Exemple #1
0
        public void DataLakeUploader_TargetExistsNoOverwrite()
        {
            var frontEnd = new InMemoryFrontEnd();

            frontEnd.CreateStream(TargetStreamPath, true, null, 0);

            //no resume, no overwrite
            var up       = CreateParameters(filePath: _smallFilePath, isResume: false);
            var uploader = new DataLakeStoreUploader(up, frontEnd);

            Assert.Throws <InvalidOperationException>(() => uploader.Execute());

            //resume, no overwrite
            up       = CreateParameters(filePath: _smallFilePath, isResume: true);
            uploader = new DataLakeStoreUploader(up, frontEnd);
            Assert.Throws <InvalidOperationException>(() => uploader.Execute());

            //resume, overwrite
            up       = CreateParameters(filePath: _smallFilePath, isResume: true, isOverwrite: true);
            uploader = new DataLakeStoreUploader(up, frontEnd);
            Assert.DoesNotThrow(() => uploader.Execute());

            //no resume, overwrite
            up       = CreateParameters(filePath: _smallFilePath, isResume: false, isOverwrite: true);
            uploader = new DataLakeStoreUploader(up, frontEnd);
            Assert.DoesNotThrow(() => uploader.Execute());
        }
Exemple #2
0
        public void DataLakeUploader_UploadDownloadSingleSegment()
        {
            var frontEnd = new InMemoryFrontEnd();
            var up       = new UploadParameters(
                inputFilePath: _smallFilePath,
                targetStreamPath: "1",
                perFileThreadCount: ThreadCount,
                accountName: "foo",
                isResume: false,
                maxSegmentLength: 4 * 1024 * 1024,
                localMetadataLocation: Path.GetTempPath());

            File.WriteAllBytes(_smallFilePath, _smallFileData);

            var uploader = new DataLakeStoreUploader(up, frontEnd);

            uploader.Execute();

            VerifyFileUploadedSuccessfully(up, frontEnd, _smallFileData);
            up = new UploadParameters(
                inputFilePath: "1",
                targetStreamPath: _downloadFilePath,
                perFileThreadCount: ThreadCount,
                accountName: "foo",
                isResume: false,
                isOverwrite: true,
                isDownload: true,
                maxSegmentLength: 4 * 1024 * 1024,
                localMetadataLocation: Path.GetTempPath());

            // now download
            uploader = new DataLakeStoreUploader(up, frontEnd);
            uploader.Execute();
            VerifyFileUploadedSuccessfully(up, frontEnd, _smallFileData);
        }
Exemple #3
0
        public void DataLakeUploader_FreshUpload()
        {
            var                        frontEnd        = new InMemoryFrontEnd();
            var                        up              = CreateParameters(isResume: false);
            UploadProgress             progress        = null;
            var                        syncRoot        = new object();
            IProgress <UploadProgress> progressTracker = new Progress <UploadProgress>(
                (p) =>
            {
                lock (syncRoot)
                {
                    //it is possible that these come out of order because of race conditions (multiple threads reporting at the same time); only update if we are actually making progress
                    if (progress == null || progress.UploadedByteCount < p.UploadedByteCount)
                    {
                        progress = p;
                    }
                }
            });
            var uploader = new DataLakeStoreUploader(up, frontEnd, progressTracker);

            uploader.Execute();

            VerifyFileUploadedSuccessfully(up, frontEnd);
            VerifyProgressStatus(progress, _largeFileData.Length);
        }
Exemple #4
0
        // Upload a file
        public static void UploadFile(string srcFilePath, string destFilePath, bool force = true)
        {
            var parameters = new UploadParameters(srcFilePath, destFilePath, _adlsAccountName, isOverwrite: force);
            var frontend   = new DataLakeStoreFrontEndAdapter(_adlsAccountName, _adlsFileSystemClient);
            var uploader   = new DataLakeStoreUploader(parameters, frontend);

            uploader.Execute();
        }
Exemple #5
0
        public static bool DownloadFile(DataLakeStoreFileSystemManagementClient dataLakeStoreFileSystemClient, string dlAccountName, string srcPath, string destPath, bool force = false, bool recursive = false)
        {
            var parameters = new UploadParameters(srcPath, destPath, dlAccountName, isOverwrite: force, isBinary: true, isDownload: true, perFileThreadCount: 40, concurrentFileCount: 100, isRecursive: recursive);
            var frontend   = new DataLakeStoreFrontEndAdapter(dlAccountName, dataLakeStoreFileSystemClient);
            var uploader   = new DataLakeStoreUploader(parameters, frontend);

            uploader.Execute();
            return(true);
        }
Exemple #6
0
        // Upload the file
        public static void UploadFile(string srcFilePath, string destFilePath, bool force = true)
        {
            //TODO: 5. Configure the file upload
            var parameters = new UploadParameters(/*...*/);
            var frontend   = new DataLakeStoreFrontEndAdapter(/*...*/);
            var uploader   = new DataLakeStoreUploader(/*...*/);

            //TODO: 6. Upload the file
            uploader.//...
        }
Exemple #7
0
        public void DataLakeUploader_CancelUpload()
        {
            CancellationTokenSource myTokenSource = new CancellationTokenSource();
            var cancelToken    = myTokenSource.Token;
            var frontEnd       = new InMemoryFrontEnd();
            var mockedFrontend = new MockableFrontEnd(frontEnd);

            mockedFrontend.GetStreamLengthImplementation = (streamPath, isDownload) =>
            {
                // sleep for 2 second to allow for the cancellation to actual happen
                Thread.Sleep(2000);
                return(frontEnd.GetStreamLength(streamPath, isDownload));
            };

            mockedFrontend.StreamExistsImplementation = (streamPath, isDownload) =>
            {
                // sleep for 2 second to allow for the cancellation to actual happen
                Thread.Sleep(2000);
                return(frontEnd.StreamExists(streamPath, isDownload));
            };
            var                        up              = CreateParameters(isResume: false);
            UploadProgress             progress        = null;
            var                        syncRoot        = new object();
            IProgress <UploadProgress> progressTracker = new Progress <UploadProgress>(
                (p) =>
            {
                lock (syncRoot)
                {
                    //it is possible that these come out of order because of race conditions (multiple threads reporting at the same time); only update if we are actually making progress
                    if (progress == null || progress.UploadedByteCount < p.UploadedByteCount)
                    {
                        progress = p;
                    }
                }
            });
            var uploader = new DataLakeStoreUploader(up, mockedFrontend, cancelToken, progressTracker);

            Task uploadTask = Task.Run(() =>
            {
                uploader.Execute();
                Thread.Sleep(2000);
            }, cancelToken);

            myTokenSource.Cancel();
            Assert.True(cancelToken.IsCancellationRequested);

            while (uploadTask.Status == TaskStatus.Running || uploadTask.Status == TaskStatus.WaitingToRun)
            {
                Thread.Sleep(250);
            }

            // Verify that the file did not get uploaded completely.
            Assert.False(frontEnd.StreamExists(up.TargetStreamPath), "Uploaded stream exists when it should not yet have been completely created");
        }
Exemple #8
0
        public void DataLakeUploader_ResumePartialUpload()
        {
            //attempt to load the file fully, but only allow creating 1 target stream
            var backingFrontEnd = new InMemoryFrontEnd();
            var frontEnd        = new MockableFrontEnd(backingFrontEnd);

            int createStreamCount = 0;

            frontEnd.CreateStreamImplementation = (path, overwrite, data, byteCount) =>
            {
                createStreamCount++;
                if (createStreamCount > 1)
                {
                    //we only allow 1 file to be created
                    throw new IntentionalException();
                }
                backingFrontEnd.CreateStream(path, overwrite, data, byteCount);
            };
            var up       = CreateParameters(isResume: false);
            var uploader = new DataLakeStoreUploader(up, frontEnd);

            uploader.DeleteMetadataFile();

            Assert.Throws <AggregateException>(() => uploader.Execute());
            Assert.False(frontEnd.StreamExists(up.TargetStreamPath), "Target stream should not have been created");
            Assert.Equal(1, backingFrontEnd.StreamCount);

            //resume the upload but point it to the real back-end, which doesn't throw exceptions
            up       = CreateParameters(isResume: true);
            uploader = new DataLakeStoreUploader(up, backingFrontEnd);

            try
            {
                uploader.Execute();
            }
            finally
            {
                uploader.DeleteMetadataFile();
            }

            VerifyFileUploadedSuccessfully(up, backingFrontEnd);
        }
Exemple #9
0
        public void UploadFile(string srcFilePath, string destFilePath, bool force = true)
        {
            var ok = false;

            while (!ok)
            {
                try {
                    _adlsFileSystemClient.FileSystem.Mkdirs(_adlsAccountName, destFilePath);
                    var parameters = new UploadParameters(srcFilePath, destFilePath, _adlsAccountName, isOverwrite: force);
                    var frontend   = new DataLakeStoreFrontEndAdapter(_adlsAccountName, _adlsFileSystemClient);
                    progress = new MyProgress();
                    var uploader = new DataLakeStoreUploader(parameters, frontend, progress);
                    uploader.Execute();
                    ok = true;
                } catch (Exception ex) {
                    Console.WriteLine(ex.ToString());
                    Login();
                }
            }
        }
Exemple #10
0
        public void DataLakeUploader_FreshFolderUploadDownload()
        {
            var frontEnd = new InMemoryFrontEnd();
            var up       = CreateParameters(isResume: false, isRecursive: true);
            UploadFolderProgress progress = null;
            var syncRoot = new object();
            IProgress <UploadFolderProgress> progressTracker = new Progress <UploadFolderProgress>(
                (p) =>
            {
                lock (syncRoot)
                {
                    //it is possible that these come out of order because of race conditions (multiple threads reporting at the same time); only update if we are actually making progress
                    if (progress == null || progress.UploadedByteCount < p.UploadedByteCount)
                    {
                        progress = p;
                    }
                }
            });
            var uploader = new DataLakeStoreUploader(up, frontEnd, null, progressTracker);

            uploader.Execute();

            VerifyFileUploadedSuccessfully(up, frontEnd);
            VerifyFolderProgressStatus(progress, _largeFileData.Length + (_smallFileData.Length * 2), 3);

            // now download
            var downloadFrontEnd = new MockableFrontEnd(frontEnd);

            // replace the isDirectory implementation to return true
            downloadFrontEnd.IsDirectoryImplementation = (streamPath) => { return(true); };
            progress = null;
            up       = CreateParameters(isRecursive: true, isResume: false, isDownload: true, targetStreamPath: Path.GetDirectoryName(_downloadFilePath), isOverwrite: true, filePath: TargetStreamPath);
            uploader = new DataLakeStoreUploader(up, downloadFrontEnd, null, progressTracker);

            uploader.Execute();
            VerifyFileUploadedSuccessfully(up, downloadFrontEnd.BaseAdapter);
            VerifyFolderProgressStatus(progress, _largeFileData.Length + (_smallFileData.Length * 2), 3);
        }
Exemple #11
0
        public void DataLakeUploader_CancelUpload()
        {
            CancellationTokenSource myTokenSource      = new CancellationTokenSource();
            var                        cancelToken     = myTokenSource.Token;
            var                        frontEnd        = new InMemoryFrontEnd();
            var                        up              = CreateParameters(isResume: false);
            UploadProgress             progress        = null;
            var                        syncRoot        = new object();
            IProgress <UploadProgress> progressTracker = new Progress <UploadProgress>(
                (p) =>
            {
                lock (syncRoot)
                {
                    //it is possible that these come out of order because of race conditions (multiple threads reporting at the same time); only update if we are actually making progress
                    if (progress == null || progress.UploadedByteCount < p.UploadedByteCount)
                    {
                        progress = p;
                    }
                }
            });
            var uploader = new DataLakeStoreUploader(up, frontEnd, cancelToken, progressTracker);

            Task uploadTask = Task.Run(() => uploader.Execute(), cancelToken);

            Assert.True(!uploadTask.IsCompleted, "The task finished before we could cancel it");
            myTokenSource.Cancel();
            Assert.True(cancelToken.IsCancellationRequested);

            while (uploadTask.Status == TaskStatus.Running || uploadTask.Status == TaskStatus.WaitingToRun)
            {
                Thread.Sleep(250);
            }

            Assert.True(uploadTask.IsCanceled, "The task was not cancelled as expected. Actual task state: " + uploadTask.Status);

            // Verify that the file did not get uploaded completely.
            Assert.False(frontEnd.StreamExists(up.TargetStreamPath), "Uploaded stream exists when it should not yet have been completely created");
        }
Exemple #12
0
        public void DataLakeUploader_ResumeUploadWithAllMissingFiles()
        {
            //this scenario is achieved by refusing to execute the concat command on the front end for the initial upload (which will interrupt it)
            //and then resuming the upload against a fresh front-end (which obviously has no files there)

            var backingFrontEnd1 = new InMemoryFrontEnd();
            var frontEnd1        = new MockableFrontEnd(backingFrontEnd1);

            frontEnd1.ConcatenateImplementation = (target, inputs) => { throw new IntentionalException(); }; //fail the concatenation

            //attempt full upload
            var up       = CreateParameters(isResume: false);
            var uploader = new DataLakeStoreUploader(up, frontEnd1);

            uploader.DeleteMetadataFile();

            Assert.Throws <IntentionalException>(() => uploader.Execute());
            Assert.False(frontEnd1.StreamExists(up.TargetStreamPath), "Target stream should not have been created");
            Assert.True(0 < backingFrontEnd1.StreamCount, "No temporary streams seem to have been created");

            //attempt to resume the upload
            var frontEnd2 = new InMemoryFrontEnd();

            up       = CreateParameters(isResume: true);
            uploader = new DataLakeStoreUploader(up, frontEnd2);

            //at this point the metadata exists locally but there are no target files in frontEnd2
            try
            {
                uploader.Execute();
            }
            finally
            {
                uploader.DeleteMetadataFile();
            }

            VerifyFileUploadedSuccessfully(up, frontEnd2);
        }
Exemple #13
0
        public void DataLakeUploader_UploadSingleSegment()
        {
            var frontEnd     = new InMemoryFrontEnd();
            var mockFrontEnd = new MockableFrontEnd(frontEnd);

            mockFrontEnd.ConcatenateImplementation = (target, inputs) => { Assert.True(false, "Concatenate should not be called when using 1 segment"); };

            var up = new UploadParameters(
                inputFilePath: _smallFilePath,
                targetStreamPath: "1",
                threadCount: ThreadCount,
                accountName: "foo",
                isResume: false,
                maxSegmentLength: 4 * 1024 * 1024,
                localMetadataLocation: Path.GetTempPath());

            File.WriteAllBytes(_smallFilePath, _smallFileData);

            var uploader = new DataLakeStoreUploader(up, frontEnd);

            uploader.Execute();

            VerifyFileUploadedSuccessfully(up, frontEnd, _smallFileData);
        }
        public void CopyDirectory(
            string destinationFolderPath,
            string accountName,
            string sourceFolderPath,
            CancellationToken cmdletCancellationToken,
            int folderThreadCount       = -1,
            int perFileThreadCount      = -1,
            bool recursive              = false,
            bool overwrite              = false,
            bool resume                 = false,
            bool forceBinaryOrText      = false,
            bool isBinary               = false,
            Cmdlet cmdletRunningRequest = null)
        {
            var totalBytes = GetByteCountInDirectory(sourceFolderPath, recursive);
            var totalFiles = GetFileCountInDirectory(sourceFolderPath, recursive);

            var progress = new ProgressRecord(
                uniqueActivityIdGenerator.Next(0, 10000000),
                string.Format("Copying Folder: {0}{1}. Total bytes remaining: {2}. Total files remaining: {3}",
                              sourceFolderPath, recursive ? " recursively" : string.Empty, totalBytes, totalFiles),
                "Copy in progress...")
            {
                PercentComplete = 0
            };

            UpdateProgress(progress, cmdletRunningRequest);

            var internalFolderThreads = folderThreadCount <= 0 ? Environment.ProcessorCount : folderThreadCount;
            var internalFileThreads   = perFileThreadCount <= 0 ? Environment.ProcessorCount : perFileThreadCount;

            // we need to override the default .NET value for max connections to a host to our number of threads, if necessary (otherwise we won't achieve the parallelism we want)
            var previousDefaultConnectionLimit = ServicePointManager.DefaultConnectionLimit;
            var previousExpect100 = ServicePointManager.Expect100Continue;

            try
            {
                ServicePointManager.DefaultConnectionLimit =
                    Math.Max((internalFolderThreads * internalFileThreads) + internalFolderThreads,
                             ServicePointManager.DefaultConnectionLimit);
                ServicePointManager.Expect100Continue = false;

                // On update from the Data Lake store uploader, capture the progress.
                var progressTracker = new System.Progress <UploadFolderProgress>();
                progressTracker.ProgressChanged += (s, e) =>
                {
                    lock (ConsoleOutputLock)
                    {
                        progress.PercentComplete = (int)(1.0 * e.UploadedByteCount / e.TotalFileLength * 100);
                        progress.Activity        = string.Format("Copying Folder: {0}{1}. Total bytes remaining: {2}. Total files remaining: {3}",
                                                                 sourceFolderPath, recursive ? " recursively" : string.Empty, e.TotalFileLength - e.UploadedByteCount, e.TotalFileCount - e.UploadedFileCount);
                    }
                };

                var uploadParameters = new UploadParameters(sourceFolderPath, destinationFolderPath, accountName, internalFileThreads, internalFolderThreads,
                                                            isOverwrite: overwrite, isResume: resume, isBinary: isBinary, isRecursive: recursive);
                var uploader = new DataLakeStoreUploader(uploadParameters,
                                                         new DataLakeStoreFrontEndAdapter(accountName, _client, cmdletCancellationToken),
                                                         cmdletCancellationToken,
                                                         folderProgressTracker: progressTracker);


                // Execute the uploader.
                var uploadTask = Task.Run(() =>
                {
                    cmdletCancellationToken.ThrowIfCancellationRequested();
                    uploader.Execute();
                    cmdletCancellationToken.ThrowIfCancellationRequested();
                }, cmdletCancellationToken);

                TrackUploadProgress(uploadTask, progress, cmdletRunningRequest, cmdletCancellationToken);



                if (!cmdletCancellationToken.IsCancellationRequested)
                {
                    progress.PercentComplete = 100;
                    progress.RecordType      = ProgressRecordType.Completed;
                    UpdateProgress(progress, cmdletRunningRequest);
                }
            }
            catch (Exception e)
            {
                throw new CloudException(string.Format(Properties.Resources.UploadFailedMessage, e));
            }
            finally
            {
                ServicePointManager.DefaultConnectionLimit = previousDefaultConnectionLimit;
                ServicePointManager.Expect100Continue      = previousExpect100;
            }
        }
Exemple #15
0
        public void DataLakeUploader_ResumePartialFolderUploadWithProgress()
        {
            //attempt to load the file fully, but only allow creating 1 target stream
            var backingFrontEnd           = new InMemoryFrontEnd();
            var frontEnd                  = new MockableFrontEnd(backingFrontEnd);
            UploadFolderProgress progress = null;
            var syncRoot                  = new object();
            IProgress <UploadFolderProgress> progressTracker = new Progress <UploadFolderProgress>(
                (p) =>
            {
                lock (syncRoot)
                {
                    //it is possible that these come out of order because of race conditions (multiple threads reporting at the same time); only update if we are actually making progress
                    if (progress == null || progress.UploadedByteCount < p.UploadedByteCount)
                    {
                        progress = p;
                    }
                }
            });
            int createStreamCount = 0;

            frontEnd.CreateStreamImplementation = (path, overwrite, data, byteCount) =>
            {
                createStreamCount++;
                if (createStreamCount > 1)
                {
                    //we only allow 1 file to be created
                    throw new IntentionalException();
                }
                backingFrontEnd.CreateStream(path, overwrite, data, byteCount);
            };
            var up       = CreateParameters(isResume: false, isRecursive: true);
            var uploader = new DataLakeStoreUploader(up, frontEnd, folderProgressTracker: progressTracker);

            uploader.DeleteMetadataFile();

            // Verifies that a bug in folder upload with progress hung on failure is fixed.
            try
            {
                var uploadTask = Task.Run(() =>
                {
                    uploader.Execute();
                });

                uploadTask.Wait(TimeSpan.FromSeconds(60));
                Assert.True(false, "Folder upload did not fail after error in less than 60 seconds");
            }
            catch (Exception ex)
            {
                Assert.True(ex is AggregateException, "The exception thrown by upload was not the expected aggregate exception.");
            }

            Assert.Equal(1, frontEnd.ListDirectory(up.TargetStreamPath, false).Keys.Count);
            Assert.Equal(1, backingFrontEnd.StreamCount);

            //resume the upload but point it to the real back-end, which doesn't throw exceptions
            up       = CreateParameters(isResume: true, isRecursive: true);
            uploader = new DataLakeStoreUploader(up, backingFrontEnd, folderProgressTracker: progressTracker);

            try
            {
                var uploadTask = Task.Run(() =>
                {
                    uploader.Execute();
                });

                uploadTask.Wait(TimeSpan.FromSeconds(60));
                Assert.True(uploadTask.IsCompleted, "Folder upload did not complete after error in less than 60 seconds");
            }
            finally
            {
                uploader.DeleteMetadataFile();
            }

            VerifyFileUploadedSuccessfully(up, backingFrontEnd);
            VerifyFolderProgressStatus(progress, _largeFileData.Length + (_smallFileData.Length * 2), 3);
        }
        public void DataLakeUploader_TargetExistsNoOverwrite()
        {
            var frontEnd = new InMemoryFrontEnd();
            frontEnd.CreateStream(TargetStreamPath, true, null, 0);

            //no resume, no overwrite
            var up = CreateParameters(filePath: _smallFilePath, isResume: false);
            var uploader = new DataLakeStoreUploader(up, frontEnd);
            Assert.Throws<InvalidOperationException>(() => uploader.Execute());

            //resume, no overwrite
            up = CreateParameters(filePath: _smallFilePath, isResume: true);
            uploader = new DataLakeStoreUploader(up, frontEnd);
            Assert.Throws<InvalidOperationException>(() => uploader.Execute());

            //resume, overwrite
            up = CreateParameters(filePath: _smallFilePath, isResume: true, isOverwrite: true);
            uploader = new DataLakeStoreUploader(up, frontEnd);
            Assert.DoesNotThrow(() => uploader.Execute());

            //no resume, overwrite
            up = CreateParameters(filePath: _smallFilePath, isResume: false, isOverwrite: true);
            uploader = new DataLakeStoreUploader(up, frontEnd);
            Assert.DoesNotThrow(() => uploader.Execute());
        }
Exemple #17
0
        public void DataLakeUploader_ResumePartialUploadDownload()
        {
            //attempt to load the file fully, but only allow creating 1 target stream
            var backingFrontEnd = new InMemoryFrontEnd();
            var frontEnd        = new MockableFrontEnd(backingFrontEnd);

            int createStreamCount = 0;

            frontEnd.CreateStreamImplementation = (path, overwrite, data, byteCount) =>
            {
                createStreamCount++;
                if (createStreamCount > 1)
                {
                    //we only allow 1 file to be created
                    throw new IntentionalException();
                }
                backingFrontEnd.CreateStream(path, overwrite, data, byteCount);
            };

            var up       = CreateParameters(isResume: false);
            var uploader = new DataLakeStoreUploader(up, frontEnd);

            uploader.DeleteMetadataFile();

            Assert.Throws <AggregateException>(() => uploader.Execute());
            Assert.Equal(1, frontEnd.ListDirectory(up.TargetStreamPath, false).Keys.Count);
            Assert.Equal(1, backingFrontEnd.StreamCount);

            //resume the upload but point it to the real back-end, which doesn't throw exceptions
            up       = CreateParameters(isResume: true);
            uploader = new DataLakeStoreUploader(up, backingFrontEnd);

            try
            {
                uploader.Execute();
            }
            finally
            {
                uploader.DeleteMetadataFile();
            }

            VerifyFileUploadedSuccessfully(up, backingFrontEnd);

            // now download the same way.
            var frontEnd2 = new MockableFrontEnd(backingFrontEnd); // need to have data from the successful upload available.

            createStreamCount = 0;
            frontEnd2.ReadStreamImplementation = (path, data, byteCount, isDownload) =>
            {
                createStreamCount++;
                if (createStreamCount > 1)
                {
                    //we only allow 1 file to be created
                    throw new IntentionalException();
                }
                return(backingFrontEnd.ReadStream(path, data, byteCount, isDownload));
            };

            up       = CreateParameters(isResume: false, isDownload: true, targetStreamPath: _downloadFilePath, isOverwrite: true, filePath: up.TargetStreamPath);
            uploader = new DataLakeStoreUploader(up, frontEnd2);

            Assert.Throws <AggregateException>(() => uploader.Execute());
            Assert.False(frontEnd2.StreamExists(up.TargetStreamPath), "Target stream should not have been created");

            // now use the good front end
            up       = CreateParameters(isResume: true, isDownload: true, targetStreamPath: _downloadFilePath, isOverwrite: true, filePath: up.InputFilePath);
            uploader = new DataLakeStoreUploader(up, backingFrontEnd);

            //resume the download but point it to the real back-end, which doesn't throw exceptions
            try
            {
                uploader.Execute();
            }
            finally
            {
                uploader.DeleteMetadataFile();
            }

            VerifyFileUploadedSuccessfully(up, backingFrontEnd);
        }
        public void CopyDirectory(
            string destinationFolderPath,
            string accountName,
            string sourceFolderPath,
            CancellationToken cmdletCancellationToken,
            int concurrentFileCount     = 5,
            int perFileThreadCount      = 10,
            bool recursive              = false,
            bool overwrite              = false,
            bool resume                 = false,
            bool forceBinaryOrText      = false,
            bool isBinary               = false,
            bool isDownload             = false,
            Cmdlet cmdletRunningRequest = null)
        {
            var totalBytes = GetByteCountInDirectory(sourceFolderPath, recursive, isDownload, accountName);
            var totalFiles = GetFileCountInDirectory(sourceFolderPath, recursive, isDownload, accountName);

            var progress = new ProgressRecord(
                uniqueActivityIdGenerator.Next(0, 10000000),
                string.Format("Copying Folder: {0}{1}. Total bytes remaining: {2}. Total files remaining: {3}",
                              sourceFolderPath, recursive ? " recursively" : string.Empty, totalBytes, totalFiles),
                "Copy in progress...")
            {
                PercentComplete = 0
            };

            UpdateProgress(progress, cmdletRunningRequest);

            var internalFolderThreads = concurrentFileCount <= 0 ? 5 : concurrentFileCount;
            var internalFileThreads   = perFileThreadCount <= 0 ? 10 : perFileThreadCount;

            // we need to override the default .NET value for max connections to a host to our number of threads, if necessary (otherwise we won't achieve the parallelism we want)
            var previousDefaultConnectionLimit = ServicePointManager.DefaultConnectionLimit;
            var previousExpect100 = ServicePointManager.Expect100Continue;

            try
            {
                // Service client tracing is enabled, however issue: https://github.com/Azure/azure-powershell/issues/2499 is not yet resolved, so debug functionality can still potentially affect performance negatively.
                ServicePointManager.DefaultConnectionLimit =
                    Math.Max((internalFolderThreads * internalFileThreads) + internalFolderThreads,
                             ServicePointManager.DefaultConnectionLimit);
                ServicePointManager.Expect100Continue = false;

                // On update from the Data Lake store uploader, capture the progress.
                var progressTracker = new System.Progress <UploadFolderProgress>();
                progressTracker.ProgressChanged += (s, e) =>
                {
                    lock (ConsoleOutputLock)
                    {
                        var toSet = (int)(1.0 * e.UploadedByteCount / e.TotalFileLength * 100);
                        // powershell defect protection. If, through some defect in
                        // our progress tracking, the number is outside of 0 - 100,
                        // powershell will crash if it is set to that value. Instead
                        // just keep the value unchanged in that case.
                        if (toSet < 0 || toSet > 100)
                        {
                            progress.PercentComplete = progress.PercentComplete;
                        }
                        else
                        {
                            progress.PercentComplete = toSet;
                        }
                        progress.Activity = string.Format("Copying Folder: {0}{1}. Total bytes remaining: {2}. Total files remaining: {3}",
                                                          sourceFolderPath, recursive ? " recursively" : string.Empty, e.TotalFileLength - e.UploadedByteCount, e.TotalFileCount - e.UploadedFileCount);
                    }
                };

                var uploadParameters = new UploadParameters(sourceFolderPath, destinationFolderPath, accountName, internalFileThreads, internalFolderThreads,
                                                            isOverwrite: overwrite, isResume: resume, isBinary: isBinary, isRecursive: recursive, isDownload: isDownload);
                var uploader = new DataLakeStoreUploader(uploadParameters,
                                                         new DataLakeStoreFrontEndAdapter(accountName, _client, cmdletCancellationToken),
                                                         cmdletCancellationToken,
                                                         folderProgressTracker: progressTracker);


                // Execute the uploader.
                var uploadTask = Task.Run(() =>
                {
                    cmdletCancellationToken.ThrowIfCancellationRequested();
                    uploader.Execute();
                    cmdletCancellationToken.ThrowIfCancellationRequested();
                }, cmdletCancellationToken);

                TrackUploadProgress(uploadTask, progress, cmdletRunningRequest, cmdletCancellationToken);



                if (!cmdletCancellationToken.IsCancellationRequested)
                {
                    progress.PercentComplete = 100;
                    progress.RecordType      = ProgressRecordType.Completed;
                    UpdateProgress(progress, cmdletRunningRequest);
                }
            }
            catch (Exception e)
            {
                throw new CloudException(string.Format(Properties.Resources.UploadFailedMessage, e));
            }
            finally
            {
                ServicePointManager.DefaultConnectionLimit = previousDefaultConnectionLimit;
                ServicePointManager.Expect100Continue      = previousExpect100;
            }
        }
Exemple #19
0
        public void CopyFile(string destinationPath, string accountName, string sourcePath,
                             CancellationToken cmdletCancellationToken, int threadCount = 10, bool overwrite = false, bool resume = false,
                             bool isBinary = false, bool isDownload = false, Cmdlet cmdletRunningRequest = null, ProgressRecord parentProgress = null)
        {
            var previousTracing = ServiceClientTracing.IsEnabled;

            try
            {
                // disable this due to performance issues during download until issue: https://github.com/Azure/azure-powershell/issues/2499 is resolved.
                ServiceClientTracing.IsEnabled = false;
                FileType ignoredType;
                if (!overwrite && (!isDownload && TestFileOrFolderExistence(destinationPath, accountName, out ignoredType) || (isDownload && File.Exists(destinationPath))))
                {
                    throw new InvalidOperationException(string.Format(Properties.Resources.LocalFileAlreadyExists, destinationPath));
                }

                if (threadCount < 1)
                {
                    threadCount = 10; // 10 is the default per our documentation.
                }

                // Progress bar indicator.
                var description = string.Format("Copying {0} File: {1} {2} Location: {3} for account: {4}",
                                                isDownload ? "Data Lake Store" : "Local",
                                                sourcePath,
                                                isDownload ? "to local" : "to Data Lake Store",
                                                destinationPath, accountName);
                var progress = new ProgressRecord(
                    uniqueActivityIdGenerator.Next(0, 10000000),
                    string.Format("{0} Data Lake Store Store", isDownload ? "Download from" : "Upload to"),
                    description)
                {
                    PercentComplete = 0
                };

                if (parentProgress != null)
                {
                    progress.ParentActivityId = parentProgress.ActivityId;
                }

                // On update from the Data Lake store uploader, capture the progress.
                var progressTracker = new System.Progress <UploadProgress>();
                progressTracker.ProgressChanged += (s, e) =>
                {
                    lock (ConsoleOutputLock)
                    {
                        var toSet = (int)(1.0 * e.UploadedByteCount / e.TotalFileLength * 100);
                        // powershell defect protection. If, through some defect in
                        // our progress tracking, the number is outside of 0 - 100,
                        // powershell will crash if it is set to that value. Instead
                        // just keep the value unchanged in that case.
                        if (toSet < 0 || toSet > 100)
                        {
                            progress.PercentComplete = progress.PercentComplete;
                        }
                        else
                        {
                            progress.PercentComplete = toSet;
                        }
                    }
                };

                var uploadParameters = new UploadParameters(sourcePath, destinationPath, accountName, threadCount,
                                                            isOverwrite: overwrite, isResume: resume, isBinary: isBinary, isDownload: isDownload);
                var uploader = new DataLakeStoreUploader(uploadParameters,
                                                         new DataLakeStoreFrontEndAdapter(accountName, _client, cmdletCancellationToken),
                                                         cmdletCancellationToken,
                                                         progressTracker);

                var previousExpect100 = ServicePointManager.Expect100Continue;
                try
                {
                    ServicePointManager.Expect100Continue = false;

                    // Execute the uploader.
                    var uploadTask = Task.Run(() =>
                    {
                        cmdletCancellationToken.ThrowIfCancellationRequested();
                        uploader.Execute();
                        cmdletCancellationToken.ThrowIfCancellationRequested();
                    }, cmdletCancellationToken);

                    TrackUploadProgress(uploadTask, progress, cmdletRunningRequest, cmdletCancellationToken);
                }
                catch (Exception e)
                {
                    throw new CloudException(string.Format(Properties.Resources.UploadFailedMessage, e));
                }
                finally
                {
                    ServicePointManager.Expect100Continue = previousExpect100;
                }
            }
            finally
            {
                ServiceClientTracing.IsEnabled = previousTracing;
            }
        }
        public void DataLakeUploader_CancelUpload()
        {
            CancellationTokenSource myTokenSource = new CancellationTokenSource();
            var cancelToken = myTokenSource.Token;
            var frontEnd = new InMemoryFrontEnd();
            var up = CreateParameters(isResume: false);
            UploadProgress progress = null;
            var syncRoot = new object();
            IProgress<UploadProgress> progressTracker = new Progress<UploadProgress>(
                (p) =>
                {
                    lock (syncRoot)
                    {
                        //it is possible that these come out of order because of race conditions (multiple threads reporting at the same time); only update if we are actually making progress
                        if (progress == null || progress.UploadedByteCount < p.UploadedByteCount)
                        {
                            progress = p;
                        }
                    }
                });
            var uploader = new DataLakeStoreUploader(up, frontEnd, cancelToken, progressTracker);

            Task uploadTask = Task.Run(() => uploader.Execute(), cancelToken);
            Assert.True(!uploadTask.IsCompleted, "The task finished before we could cancel it");
            myTokenSource.Cancel();
            Assert.True(cancelToken.IsCancellationRequested);

            while (uploadTask.Status == TaskStatus.Running || uploadTask.Status == TaskStatus.WaitingToRun)
            {
                Thread.Sleep(250);
            }

            Assert.True(uploadTask.IsCanceled, "The task was not cancelled as expected. Actual task state: " + uploadTask.Status);

            // Verify that the file did not get uploaded completely.
            Assert.False(frontEnd.StreamExists(up.TargetStreamPath), "Uploaded stream exists when it should not yet have been completely created");
        }
        public void DataLakeUploader_FreshUpload()
        {
            var frontEnd = new InMemoryFrontEnd();
            var up = CreateParameters(isResume: false);
            UploadProgress progress = null;
            var syncRoot = new object();
            IProgress<UploadProgress> progressTracker = new Progress<UploadProgress>(
                (p) => 
                {
                    lock (syncRoot)
                    {
                        //it is possible that these come out of order because of race conditions (multiple threads reporting at the same time); only update if we are actually making progress
                        if (progress == null || progress.UploadedByteCount < p.UploadedByteCount)
                        {
                            progress = p;
                        }
                    }
                });
            var uploader = new DataLakeStoreUploader(up, frontEnd, progressTracker);

            uploader.Execute();

            VerifyFileUploadedSuccessfully(up, frontEnd);
            VerifyProgressStatus(progress, _largeFileData.Length);
        }
        public void DataLakeUploader_ResumeUploadWithAllMissingFiles()
        {
            //this scenario is achieved by refusing to execute the concat command on the front end for the initial upload (which will interrupt it)
            //and then resuming the upload against a fresh front-end (which obviously has no files there)
            
            var backingFrontEnd1 = new InMemoryFrontEnd();
            var frontEnd1 = new MockableFrontEnd(backingFrontEnd1);
            frontEnd1.ConcatenateImplementation = (target, inputs) => { throw new IntentionalException(); }; //fail the concatenation
            
            //attempt full upload
            var up = CreateParameters(isResume: false);
            var uploader = new DataLakeStoreUploader(up, frontEnd1);
            uploader.DeleteMetadataFile();

            Assert.Throws<IntentionalException>(() => uploader.Execute());
            Assert.False(frontEnd1.StreamExists(up.TargetStreamPath), "Target stream should not have been created");
            Assert.True(0 < backingFrontEnd1.StreamCount, "No temporary streams seem to have been created");

            //attempt to resume the upload
            var frontEnd2 = new InMemoryFrontEnd();
            up = CreateParameters(isResume: true);
            uploader = new DataLakeStoreUploader(up, frontEnd2);

            //at this point the metadata exists locally but there are no target files in frontEnd2
            try
            {
                uploader.Execute();
            }
            finally
            {
                uploader.DeleteMetadataFile();
            }

            VerifyFileUploadedSuccessfully(up, frontEnd2);
        }
Exemple #23
0
        private static void Main(string[] args)
        {
            try
            {
                Console.WriteLine("------------------------------------------------------------------------------");
                Console.WriteLine($"AdlTransfer {Assembly.GetEntryAssembly().GetName().Version} Copyright (c) 2016 Sascha Dittmann. All Rights Reserved.");
                Console.WriteLine("------------------------------------------------------------------------------");

                if (ParseArguments(args))
                {
                    return;
                }

                Console.WriteLine();
                Console.WriteLine($"Source: {_sourcePath}");
                Console.WriteLine($"Target: {_targetPath}");
                Console.WriteLine($"Account Name: {_accountName}");
                if (_verbose)
                {
                    Console.WriteLine();
                    Console.WriteLine($"Per File Thread Count: {_perFileThreadCount}");
                    Console.WriteLine($"Concurrent File Count: {_concurrentFileCount}");
                    Console.WriteLine($"Segment Length: {_maxSegmentLength.ToSizeString()}");
                    Console.WriteLine();
                    Console.WriteLine($"Overwrite: {_isOverwrite}");
                    Console.WriteLine($"Binary: {_isBinary}");
                    Console.WriteLine($"Recursive: {_isRecursive}");
                }
                Console.WriteLine();

                var credentials     = Authenticate();
                var client          = new DataLakeStoreFileSystemManagementClient(credentials);
                var frontEndAdapter = new DataLakeStoreFrontEndAdapter(_accountName, client);

                var uploadParameters = new UploadParameters(
                    _sourcePath,
                    _targetPath,
                    _accountName,
                    _perFileThreadCount,
                    _concurrentFileCount,
                    _isOverwrite,
                    _isResume,
                    _isBinary,
                    _isRecursive,
                    _isDownload,
                    _maxSegmentLength,
                    _localMetadataLocation);

                var progressTracker = new Progress <UploadProgress>();
                progressTracker.ProgressChanged += UploadProgressChanged;

                var folderProgressTracker = new Progress <UploadFolderProgress>();
                folderProgressTracker.ProgressChanged += UploadFolderProgressChanged;

                var uploader = new DataLakeStoreUploader(uploadParameters, frontEndAdapter, progressTracker, folderProgressTracker);

                Console.WriteLine($"{(_isResume ? "Resuming" : "Starting")} {(_isDownload ? "Download" : "Upload")}...");
                uploader.Execute();
                Console.WriteLine($"{(_isDownload ? "Download" : "Upload")} completed.");
            }
            catch (Exception e)
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.Error.WriteLine(e.Message);
                Console.ResetColor();
                Environment.ExitCode = -1;
            }
        }
        public void DataLakeUploader_ResumePartialUpload()
        {
            //attempt to load the file fully, but only allow creating 1 target stream
            var backingFrontEnd = new InMemoryFrontEnd();
            var frontEnd = new MockableFrontEnd(backingFrontEnd);

            int createStreamCount = 0;
            frontEnd.CreateStreamImplementation = (path, overwrite, data, byteCount) =>
            {
                createStreamCount++;
                if (createStreamCount > 1)
                {
                    //we only allow 1 file to be created
                    throw new IntentionalException();
                }
                backingFrontEnd.CreateStream(path, overwrite, data, byteCount);
            };
            var up = CreateParameters(isResume: false);
            var uploader = new DataLakeStoreUploader(up, frontEnd);
            uploader.DeleteMetadataFile();

            Assert.Throws<AggregateException>(() => uploader.Execute());
            Assert.False(frontEnd.StreamExists(up.TargetStreamPath), "Target stream should not have been created");
            Assert.Equal(1, backingFrontEnd.StreamCount);

            //resume the upload but point it to the real back-end, which doesn't throw exceptions
            up = CreateParameters(isResume: true);
            uploader = new DataLakeStoreUploader(up, backingFrontEnd);

            try
            {
                uploader.Execute();
            }
            finally
            {
                uploader.DeleteMetadataFile();
            }

            VerifyFileUploadedSuccessfully(up, backingFrontEnd);
        }
Exemple #25
0
        public static bool UploadFile(DataLakeStoreFileSystemManagementClient dataLakeStoreFileSystemClient, string dlAccountName, string srcPath, string destPath, bool force = false, bool recursive = false, bool testCancel = false)
        {
            var cancelSource    = new CancellationTokenSource();
            var myToken         = cancelSource.Token;
            var parameters      = new UploadParameters(srcPath, destPath, dlAccountName, isOverwrite: force, isBinary: true, perFileThreadCount: 40, concurrentFileCount: 100, isRecursive: recursive);
            var progressTracker = new System.Progress <UploadFolderProgress>();

            progressTracker.ProgressChanged += (s, e) =>
            {
                if (e.TotalFileCount == 0)
                {
                    Console.WriteLine("we are done!");
                }
            };
            var frontend = new DataLakeStoreFrontEndAdapter(dlAccountName, dataLakeStoreFileSystemClient, myToken);
            var uploader = new DataLakeStoreUploader(parameters, frontend, myToken, folderProgressTracker: progressTracker);

            if (testCancel)
            {
                var uploadTask = Task.Run(() =>
                {
                    myToken.ThrowIfCancellationRequested();
                    uploader.Execute();
                    myToken.ThrowIfCancellationRequested();
                }, myToken);

                try
                {
                    while (!uploadTask.IsCompleted && !uploadTask.IsCanceled)
                    {
                        if (myToken.IsCancellationRequested)
                        {
                            // we are done tracking progress and will just break and let the task clean itself up.
                            try
                            {
                                uploadTask.Wait();
                            }
                            catch (OperationCanceledException)
                            {
                                if (uploadTask.IsCanceled)
                                {
                                    uploadTask.Dispose();
                                }
                            }
                            catch (AggregateException ex)
                            {
                                if (ex.InnerExceptions.OfType <OperationCanceledException>().Any())
                                {
                                    if (uploadTask.IsCanceled)
                                    {
                                        uploadTask.Dispose();
                                    }
                                }
                                else
                                {
                                    throw;
                                }
                            }
                            catch (Exception ex)
                            {
                                // swallow this for debugging to see what it is.
                            }

                            break;
                        }

                        Thread.Sleep(60000);
                        // run for 60 seconds and then cancel out and see what happens
                        cancelSource.Cancel();
                    }
                }
                catch (OperationCanceledException)
                {
                    // do nothing since we successfully cancelled out
                }
                catch (Exception ex)
                {
                    // see what the heck is going on.
                }
            }
            else
            {
                uploader.Execute();
            }
            return(true);
        }
        public void DataLakeUploader_UploadSingleSegment()
        {
            var frontEnd = new InMemoryFrontEnd();
            var mockFrontEnd = new MockableFrontEnd(frontEnd);
            mockFrontEnd.ConcatenateImplementation = (target, inputs) => { Assert.True(false, "Concatenate should not be called when using 1 segment"); };

            var up = new UploadParameters(
                inputFilePath: _smallFilePath,
                targetStreamPath: "1",
                threadCount: ThreadCount,
                accountName: "foo",
                isResume: false,
                maxSegmentLength: 4 * 1024 * 1024,
                localMetadataLocation: Path.GetTempPath());

            File.WriteAllBytes(_smallFilePath, _smallFileData);

            var uploader = new DataLakeStoreUploader(up, frontEnd);
            uploader.Execute();

            VerifyFileUploadedSuccessfully(up, frontEnd, _smallFileData);
        }
        public void CopyFile(string destinationPath, string accountName, string sourcePath,
            CancellationToken cmdletCancellationToken, int threadCount = -1, bool overwrite = false, bool resume = false,
            bool isBinary = false, Cmdlet cmdletRunningRequest = null, ProgressRecord parentProgress = null)
        {
            FileType ignoredType;   
            if (!overwrite && TestFileOrFolderExistence(destinationPath, accountName, out ignoredType))
            {
                throw new InvalidOperationException(string.Format(Properties.Resources.LocalFileAlreadyExists, destinationPath));    
            }

            //TODO: defect: 4259238 (located here: http://vstfrd:8080/Azure/RD/_workitems/edit/4259238) needs to be resolved or the tracingadapter work around needs to be put back in
            // default the number of threads to use to the processor count
            if (threadCount < 1)
            {
                threadCount = Environment.ProcessorCount;
            }

            // Progress bar indicator.
            var description = string.Format("Copying File: {0} to DataLakeStore Location: {1} for account: {2}",
                sourcePath, destinationPath, accountName);
            var progress = new ProgressRecord(
                uniqueActivityIdGenerator.Next(0, 10000000),
                "Upload to DataLakeStore Store",
                description)
            {
                PercentComplete = 0
            };

            if (parentProgress != null)
            {
                progress.ParentActivityId = parentProgress.ActivityId;
            }

            // On update from the Data Lake store uploader, capture the progress.
            var progressTracker = new System.Progress<UploadProgress>();
            progressTracker.ProgressChanged += (s, e) =>
            {
                lock (ConsoleOutputLock)
                {
                    progress.PercentComplete = (int) (1.0*e.UploadedByteCount/e.TotalFileLength*100);
                }
            };

            var uploadParameters = new UploadParameters(sourcePath, destinationPath, accountName, threadCount,
                overwrite, resume, isBinary);
            var uploader = new DataLakeStoreUploader(uploadParameters,
                new DataLakeStoreFrontEndAdapter(accountName, _client, cmdletCancellationToken),
                cmdletCancellationToken,
                progressTracker);

            var previousExpect100 = ServicePointManager.Expect100Continue;
            try
            {
                ServicePointManager.Expect100Continue = false;

                // Execute the uploader.
                var uploadTask = Task.Run(() =>
                {
                    cmdletCancellationToken.ThrowIfCancellationRequested();
                    uploader.Execute();
                    cmdletCancellationToken.ThrowIfCancellationRequested();
                }, cmdletCancellationToken);

                TrackUploadProgress(uploadTask, progress, cmdletRunningRequest, cmdletCancellationToken);
            }
            finally
            {
                ServicePointManager.Expect100Continue = previousExpect100;
            }
        }
Exemple #28
0
        public void CopyFile(string destinationPath, string accountName, string sourcePath,
                             CancellationToken cmdletCancellationToken, int threadCount = -1, bool overwrite = false, bool resume = false,
                             bool isBinary = false, Cmdlet cmdletRunningRequest = null, ProgressRecord parentProgress             = null)
        {
            var originalValue = TracingAdapter.IsEnabled;

            try
            {
                //TODO: Remove this logic when defect: 4259238 (located here: http://vstfrd:8080/Azure/RD/_workitems/edit/4259238) is resolved
                TracingAdapter.IsEnabled = false;

                // default the number of threads to use to the processor count
                if (threadCount < 1)
                {
                    threadCount = Environment.ProcessorCount;
                }

                // Progress bar indicator.
                var description = string.Format("Copying File: {0} to DataLakeStore Location: {1} for account: {2}",
                                                sourcePath, destinationPath, accountName);
                var progress = new ProgressRecord(
                    uniqueActivityIdGenerator.Next(0, 10000000),
                    "Upload to DataLakeStore Store",
                    description)
                {
                    PercentComplete = 0
                };

                if (parentProgress != null)
                {
                    progress.ParentActivityId = parentProgress.ActivityId;
                }

                // On update from the Data Lake store uploader, capture the progress.
                var progressTracker = new System.Progress <UploadProgress>();
                progressTracker.ProgressChanged += (s, e) =>
                {
                    lock (ConsoleOutputLock)
                    {
                        progress.PercentComplete = (int)(1.0 * e.UploadedByteCount / e.TotalFileLength * 100);
                    }
                };

                var uploadParameters = new UploadParameters(sourcePath, destinationPath, accountName, threadCount,
                                                            overwrite, resume, isBinary);
                var uploader = new DataLakeStoreUploader(uploadParameters,
                                                         new DataLakeStoreFrontEndAdapter(accountName, _client, cmdletCancellationToken),
                                                         cmdletCancellationToken,
                                                         progressTracker);
                // Execute the uploader.
                var uploadTask = Task.Run(() =>
                {
                    cmdletCancellationToken.ThrowIfCancellationRequested();
                    uploader.Execute();
                    cmdletCancellationToken.ThrowIfCancellationRequested();
                }, cmdletCancellationToken);

                TrackUploadProgress(uploadTask, progress, cmdletRunningRequest, cmdletCancellationToken);
            }
            finally
            {
                TracingAdapter.IsEnabled = originalValue;
            }
        }