Esempio n. 1
0
        /// <summary>
        /// Constructs a new TransferMetadata from the given parameters.
        /// </summary>
        /// <param name="metadataFilePath">The file path to assign to this metadata file (for saving purposes).</param>
        /// <param name="transferParameters">The parameters to use for constructing this metadata.</param>
        /// <param name="frontEnd">The front end. This is used only in the constructor for determining file length</param>
        internal TransferMetadata(string metadataFilePath, TransferParameters transferParameters, IFrontEndAdapter frontEnd, long fileSize = -1)
        {
            this.MetadataFilePath = metadataFilePath;

            this.TransferId       = Guid.NewGuid().ToString("N");
            this.InputFilePath    = transferParameters.InputFilePath;
            this.TargetStreamPath = transferParameters.TargetStreamPath;
            this.IsDownload       = transferParameters.IsDownload;

            this.SegmentStreamDirectory = GetSegmentStreamDirectory();

            this.IsBinary = transferParameters.IsBinary;

            this.FileLength = fileSize < 0 ? frontEnd.GetStreamLength(transferParameters.InputFilePath, !IsDownload) : fileSize;

            this.EncodingCodePage = transferParameters.FileEncoding.CodePage;

            // we are taking the smaller number of segments between segment lengths of 256 and the segment growth logic.
            // this protects us against agressive increase of thread count resulting in far more segments than
            // is reasonable for a given file size. We also ensure that each segment is at least 256mb in size.
            // This is the size that ensures we have the optimal storage creation in the store.
            var preliminarySegmentCount = (int)Math.Ceiling((double)this.FileLength / transferParameters.MaxSegementLength);

            this.SegmentCount  = Math.Min(preliminarySegmentCount, TransferSegmentMetadata.CalculateSegmentCount(this.FileLength));
            this.SegmentLength = TransferSegmentMetadata.CalculateSegmentLength(this.FileLength, this.SegmentCount);

            this.Segments = new TransferSegmentMetadata[this.SegmentCount];
            for (int i = 0; i < this.SegmentCount; i++)
            {
                this.Segments[i] = new TransferSegmentMetadata(i, this);
            }

            if (!transferParameters.IsBinary && this.SegmentCount > 1 && !this.IsDownload)
            {
                this.AlignSegmentsToRecordBoundaries();

                // ensure that nothing strange happened during alignment
                this.ValidateConsistency();
            }

            // initialize the status to pending, since it is not yet done.
            this.Status = SegmentTransferStatus.Pending;
        }
Esempio n. 2
0
 /// <summary>
 /// Creates a new instance of the TransferMetadataGenerator with the given parameters and the given maximum append length.
 /// </summary>
 /// <param name="parameters">The parameters.</param>
 /// <param name="frontend">The frontend to use when generating new metadata.</param>
 public TransferMetadataGenerator(TransferParameters parameters, IFrontEndAdapter frontend)
 {
     _parameters = parameters;
     _frontend   = frontend;
 }
Esempio n. 3
0
        /// <summary>
        /// Uploads a folder to the specified Data Lake Store account.
        /// </summary>
        /// <param name='accountName'>
        /// The Azure Data Lake Store account to execute filesystem operations on.
        /// </param>
        /// <param name='sourcePath'>
        /// The local source folder to upload to the Data Lake Store account.
        /// </param>
        /// <param name='destinationPath'>
        /// The Data Lake Store path (starting with '/') of the directory to upload to.
        /// </param>
        /// <param name='perFileThreadCount'>
        /// The maximum number of threads to use per file during the upload. By default, this number will be computed based on folder structure and average file size.
        /// </param>
        /// <param name='concurrentFileCount'>
        /// The maximum number of files to upload at once. By default, this number will be computed based on folder structure and number of files.
        /// </param>
        /// <param name='resume'>
        /// A switch indicating if this upload is a continuation of a previous, failed upload. Default is false.
        /// </param>
        /// <param name='overwrite'>
        /// A switch indicating this upload should overwrite the contents of the target directory if it exists. Default is false, and the upload will fast fail if the target location exists.
        /// </param>
        /// <param name='uploadAsBinary'>
        /// A switch indicating this upload should treat all data as binary, which is slightly more performant but does not ensure record boundary integrity. This is recommended for large folders of mixed binary and text files or binary only directories. Default is false
        /// </param>
        /// <param name='recurse'>
        /// A switch indicating this upload should upload the source directory recursively or just the top level. Default is false, only the top level will be uploaded.
        /// </param>
        /// <param name='progressTracker'>
        /// An optional delegate that can be used to track the progress of the upload operation asynchronously.
        /// </param>
        /// <param name='cancellationToken'>
        /// The cancellation token.
        /// </param>
        /// <exception cref="AdlsErrorException">
        /// Thrown when the operation returned an invalid status code.
        /// </exception>
        /// <exception cref="TaskCanceledException">
        /// Thrown when the operation takes too long to complete or if the user explicitly cancels it.
        /// </exception>
        /// <exception cref="InvalidMetadataException">
        /// Thrown when resume metadata is corrupt or not associated with the current operation.
        /// </exception>
        /// <exception cref="FileNotFoundException">
        /// Thrown when the source path cannot be found.
        /// </exception>
        /// <exception cref="InvalidOperationException">
        /// Thrown if an invalid upload is attempted or a file/folder is modified externally during the operation.
        /// </exception>
        /// <exception cref="TransferFailedException">
        /// Thrown if the transfer operation fails.
        /// </exception>
        /// <exception cref="SerializationException">
        /// Thrown when unable to deserialize the response
        /// </exception>
        /// <exception cref="ValidationException">
        /// Thrown when a required parameter is null
        /// </exception>
        public void UploadFolder(
            string accountName,
            string sourcePath,
            string destinationPath,
            int perFileThreadCount  = -1,
            int concurrentFileCount = -1,
            bool resume             = false,
            bool overwrite          = false,
            bool uploadAsBinary     = false,
            bool recurse            = false,
            IProgress <TransferFolderProgress> progressTracker = null,
            CancellationToken cancellationToken = default(CancellationToken))
        {
            bool   _shouldTrace  = ServiceClientTracing.IsEnabled;
            string _invocationId = null;

            if (_shouldTrace)
            {
                _invocationId = ServiceClientTracing.NextInvocationId.ToString();
                Dictionary <string, object> tracingParameters = new Dictionary <string, object>();
                tracingParameters.Add("accountName", accountName);
                tracingParameters.Add("sourcePath", sourcePath);
                tracingParameters.Add("destinationPath", destinationPath);
                tracingParameters.Add("perFileThreadCount", perFileThreadCount);
                tracingParameters.Add("concurrentFileCount", concurrentFileCount);
                tracingParameters.Add("resume", resume);
                tracingParameters.Add("overwrite", overwrite);
                tracingParameters.Add("recurse", recurse);
                tracingParameters.Add("uploadAsBinary", uploadAsBinary);
                tracingParameters.Add("progressTracker", progressTracker);
                tracingParameters.Add("cancellationToken", cancellationToken);
                ServiceClientTracing.Enter(_invocationId, this, "UploadFolder", tracingParameters);
            }

            try
            {
                var parameters = new TransferParameters(
                    inputFilePath: sourcePath,
                    targetStreamPath: destinationPath,
                    accountName: accountName,
                    perFileThreadCount: perFileThreadCount,
                    concurrentFileCount: concurrentFileCount,
                    isOverwrite: overwrite,
                    isResume: resume,
                    isRecursive: recurse,
                    isBinary: uploadAsBinary
                    );

                var transferAdapter = new DataLakeStoreFrontEndAdapter(accountName, this.Client);
                var transferClient  = new DataLakeStoreTransferClient(
                    parameters,
                    transferAdapter,
                    token: cancellationToken,
                    folderProgressTracker: progressTracker);

                transferClient.Execute();

                if (_shouldTrace)
                {
                    ServiceClientTracing.Exit(
                        _invocationId,
                        string.Format(
                            "Upload of folder to account: {0} from source location: {1}{2} to destination: {3} completed successfully.",
                            accountName,
                            sourcePath,
                            recurse ? ", recursively," : string.Empty,
                            destinationPath));
                }
            }
            catch (Exception ex)
            {
                if (_shouldTrace)
                {
                    ServiceClientTracing.Error(_invocationId, ex);
                }

                throw ex;
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Constructs a new TransferFolderMetadata object from the given parameters.
        /// </summary>
        /// <param name="metadataFilePath">The file path to assign to this metadata file (for saving purposes).</param>
        /// <param name="transferParameters">The parameters to use for constructing this metadata.</param>
        /// <param name="frontend">The frontend to use when generating per file metadata.</param>
        public TransferFolderMetadata(string metadataFilePath, TransferParameters transferParameters, IFrontEndAdapter frontend)
        {
            this.MetadataFilePath = metadataFilePath;

            this.TransferId             = Guid.NewGuid().ToString("N");
            this.InputFolderPath        = transferParameters.InputFilePath;
            this.TargetStreamFolderPath = transferParameters.TargetStreamPath.TrimEnd('/');
            this.IsRecursive            = transferParameters.IsRecursive;
            // get this list of all files in the source directory, depending on if this is recursive or not.
            ConcurrentQueue <string>    allFiles;
            ConcurrentQueue <Exception> exceptions = new ConcurrentQueue <Exception>();

            Dictionary <string, long> downloadFiles = new Dictionary <string, long>();

            if (transferParameters.IsDownload)
            {
                foreach (var entry in frontend.ListDirectory(transferParameters.InputFilePath, transferParameters.IsRecursive))
                {
                    downloadFiles.Add(entry.Key, entry.Value);
                }

                allFiles            = new ConcurrentQueue <string>(downloadFiles.Keys);
                this.TotalFileBytes = downloadFiles.Values.Sum();
            }
            else
            {
                allFiles = new ConcurrentQueue <string>(this.IsRecursive ? Directory.EnumerateFiles(this.InputFolderPath, "*.*", SearchOption.AllDirectories) :
                                                        Directory.EnumerateFiles(this.InputFolderPath, "*.*", SearchOption.TopDirectoryOnly));

                this.TotalFileBytes = GetByteCountFromFileList(allFiles);
            }

            this.FileCount = allFiles.Count();
            this.Files     = new TransferMetadata[this.FileCount];
            // explicitly set the thread pool start amount to at most 500
            int threadCount = Math.Min(this.FileCount, 500);
            var threads     = new List <Thread>(threadCount);

            //start a bunch of new threads that will create the metadata and ensure a protected index.
            int    currentIndex       = 0;
            object indexIncrementLock = new object();

            for (int i = 0; i < threadCount; i++)
            {
                var t = new Thread(() => {
                    string curFile;
                    while (allFiles.TryDequeue(out curFile))
                    {
                        try
                        {
                            var relativeFilePath = curFile.Replace(this.InputFolderPath, "").TrimStart('\\').TrimStart('/');
                            var paramsPerFile    = new TransferParameters
                                                   (
                                curFile,
                                String.Format("{0}{1}{2}", this.TargetStreamFolderPath, transferParameters.IsDownload ? "\\" : "/", relativeFilePath),
                                transferParameters.AccountName,
                                transferParameters.PerFileThreadCount,
                                transferParameters.ConcurrentFileCount,
                                transferParameters.IsOverwrite,
                                transferParameters.IsResume,
                                transferParameters.IsBinary,
                                transferParameters.IsRecursive,
                                transferParameters.IsDownload,
                                transferParameters.MaxSegementLength,
                                transferParameters.LocalMetadataLocation
                                                   );

                            long size = -1;
                            if (transferParameters.IsDownload && downloadFiles != null)
                            {
                                size = downloadFiles[curFile];
                            }
                            var transferMetadataPath = Path.Combine(transferParameters.LocalMetadataLocation, string.Format("{0}.transfer.xml", Path.GetFileName(curFile)));
                            var eachFileMetadata     = new TransferMetadata(transferMetadataPath, paramsPerFile, frontend, size);
                            lock (indexIncrementLock)
                            {
                                this.Files[currentIndex] = eachFileMetadata;
                                currentIndex++;
                            }
                        }
                        catch (Exception e)
                        {
                            exceptions.Enqueue(e);
                        }
                    }
                });
                t.Start();
                threads.Add(t);
            }

            foreach (var t in threads)
            {
                t.Join();
            }

            if (exceptions.Count > 0)
            {
                throw new AggregateException("At least one file failed to have metadata generated", exceptions.ToArray());
            }
        }
Esempio n. 5
0
        /// <summary>
        /// Downloads a file from the specified Data Lake Store account.
        /// </summary>
        /// <param name='accountName'>
        /// The Azure Data Lake Store account to execute filesystem operations on.
        /// </param>
        /// <param name='sourcePath'>
        /// The Data Lake Store path (starting with '/') of the file to download.
        /// </param>
        /// <param name='destinationPath'>
        /// The local path to download the file to. If a directory is specified, the file name will be the same as the source file name
        /// </param>
        /// <param name='threadCount'>
        /// The maximum number of threads to use during the download. By default, this number will be computed based on file size.
        /// </param>
        /// <param name='resume'>
        /// A switch indicating if this download is a continuation of a previous, failed download. Default is false.
        /// </param>
        /// <param name='overwrite'>
        /// A switch indicating this download should overwrite the the target file if it exists. Default is false, and the download will fast fail if the target file exists.
        /// </param>
        /// <param name='progressTracker'>
        /// An optional delegate that can be used to track the progress of the download operation asynchronously.
        /// </param>
        /// <param name='cancellationToken'>
        /// The cancellation token.
        /// </param>
        /// <exception cref="AdlsErrorException">
        /// Thrown when the operation returned an invalid status code.
        /// </exception>
        /// <exception cref="TaskCanceledException">
        /// Thrown when the operation takes too long to complete or if the user explicitly cancels it.
        /// </exception>
        /// <exception cref="InvalidMetadataException">
        /// Thrown when resume metadata is corrupt or not associated with the current operation.
        /// </exception>
        /// <exception cref="FileNotFoundException">
        /// Thrown when the source path cannot be found.
        /// </exception>
        /// <exception cref="InvalidOperationException">
        /// Thrown if an invalid download is attempted or a file is modified externally during the operation.
        /// </exception>
        /// <exception cref="TransferFailedException">
        /// Thrown if the transfer operation fails.
        /// </exception>
        /// <exception cref="SerializationException">
        /// Thrown when unable to deserialize the response
        /// </exception>
        /// <exception cref="ValidationException">
        /// Thrown when a required parameter is null
        /// </exception>
        public void DownloadFile(
            string accountName,
            string sourcePath,
            string destinationPath,
            int threadCount = -1,
            bool resume     = false,
            bool overwrite  = false,
            IProgress <TransferProgress> progressTracker = null,
            CancellationToken cancellationToken          = default(CancellationToken))
        {
            bool   _shouldTrace  = ServiceClientTracing.IsEnabled;
            string _invocationId = null;

            if (_shouldTrace)
            {
                _invocationId = ServiceClientTracing.NextInvocationId.ToString();
                Dictionary <string, object> tracingParameters = new Dictionary <string, object>();
                tracingParameters.Add("accountName", accountName);
                tracingParameters.Add("sourcePath", sourcePath);
                tracingParameters.Add("destinationPath", destinationPath);
                tracingParameters.Add("threadCount", threadCount);
                tracingParameters.Add("resume", resume);
                tracingParameters.Add("overwrite", overwrite);
                tracingParameters.Add("progressTracker", progressTracker);
                tracingParameters.Add("cancellationToken", cancellationToken);
                ServiceClientTracing.Enter(_invocationId, this, "DownloadFile", tracingParameters);
            }

            try
            {
                var parameters = new TransferParameters(
                    inputFilePath: sourcePath,
                    targetStreamPath: destinationPath,
                    accountName: accountName,
                    perFileThreadCount: threadCount,
                    isOverwrite: overwrite,
                    isResume: resume,
                    isDownload: true
                    );

                var transferAdapter = new DataLakeStoreFrontEndAdapter(accountName, this.Client);
                var transferClient  = new DataLakeStoreTransferClient(
                    parameters,
                    transferAdapter,
                    cancellationToken,
                    progressTracker);

                transferClient.Execute();

                if (_shouldTrace)
                {
                    ServiceClientTracing.Exit(
                        _invocationId,
                        string.Format(
                            "Download of stream in account: {0} from source location: {1} to destination: {2} completed successfully.",
                            accountName,
                            sourcePath,
                            destinationPath));
                }
            }
            catch (Exception ex)
            {
                if (_shouldTrace)
                {
                    ServiceClientTracing.Error(_invocationId, ex);
                }

                throw ex;
            }
        }