/// <summary>
        /// Creates a new uploader for a single segment.
        /// </summary>
        /// <param name="segmentNumber">The sequence number of the segment.</param>
        /// <param name="uploadMetadata">The metadata for the entire upload.</param>
        /// <param name="frontEnd">A pointer to the front end.</param>
        /// <param name="token">The cancellation token to use</param>
        /// <param name="progressTracker">(Optional) A tracker to report progress on this segment.</param>
        public SingleSegmentUploader(int segmentNumber, UploadMetadata uploadMetadata, IFrontEndAdapter frontEnd, CancellationToken token, IProgress<SegmentUploadProgress> progressTracker = null)
        {
            _metadata = uploadMetadata;
            _segmentMetadata = uploadMetadata.Segments[segmentNumber];

            _frontEnd = frontEnd;
            _progressTracker = progressTracker;
            _token = token;
            this.UseBackOffRetryStrategy = true;
        }
        /// <summary>
        /// Creates a new uploader for a single segment.
        /// </summary>
        /// <param name="segmentNumber">The sequence number of the segment.</param>
        /// <param name="uploadMetadata">The metadata for the entire upload.</param>
        /// <param name="frontEnd">A pointer to the front end.</param>
        /// <param name="token">The cancellation token to use</param>
        /// <param name="progressTracker">(Optional) A tracker to report progress on this segment.</param>
        public SingleSegmentUploader(int segmentNumber, UploadMetadata uploadMetadata, IFrontEndAdapter frontEnd, CancellationToken token, IProgress <SegmentUploadProgress> progressTracker = null)
        {
            _metadata        = uploadMetadata;
            _segmentMetadata = uploadMetadata.Segments[segmentNumber];

            _frontEnd                    = frontEnd;
            _progressTracker             = progressTracker;
            _token                       = token;
            this.UseBackOffRetryStrategy = true;
        }
        /// <summary>
        /// Calculates the value by which we'd need to adjust the length of the given segment, by searching for the nearest newline around it (before and after),
        /// and returning the distance to it (which can be positive, if after, or negative, if before).
        /// </summary>
        /// <param name="segment"></param>
        /// <param name="stream"></param>
        /// <returns></returns>
        /// <exception cref="Microsoft.Azure.Management.DataLake.StoreUploader.UploadFailedException">If no record boundary could be located on either side of the segment end offset within the allowed distance.</exception>
        private int DetermineLengthAdjustment(UploadSegmentMetadata segment, FileStream stream)
        {
            long referenceFileOffset = segment.Offset + segment.Length;

            byte[] buffer = new byte[_maxAppendLength];

            //read 2MB before the segment boundary and 2MB after (for a total of 4MB = max append length)
            int bytesRead = ReadIntoBufferAroundReference(stream, buffer, referenceFileOffset);

            if (bytesRead > 0)
            {
                int middlePoint = bytesRead / 2;
                //search for newline in it
                int newLinePosBefore = StringExtensions.FindNewline(buffer, middlePoint + 1, middlePoint + 1, true);

                //in some cases, we may have a newline that is 2 characters long, and it occurrs exactly on the midpoint, which means we won't be able to find its end.
                //see if that's the case, and then search for a new candidate before it.
                if (newLinePosBefore == middlePoint + 1 && buffer[newLinePosBefore] == (byte)'\r')
                {
                    int newNewLinePosBefore = StringExtensions.FindNewline(buffer, middlePoint, middlePoint, true);
                    if (newNewLinePosBefore >= 0)
                    {
                        newLinePosBefore = newNewLinePosBefore;
                    }
                }

                int newLinePosAfter = StringExtensions.FindNewline(buffer, middlePoint, middlePoint, false);
                if (newLinePosAfter == buffer.Length - 1 && buffer[newLinePosAfter] == (byte)'\r' && newLinePosBefore >= 0)
                {
                    newLinePosAfter = -1;
                }

                int closestNewLinePos = FindClosestToCenter(newLinePosBefore, newLinePosAfter, middlePoint);

                //middle point of the buffer corresponds to the reference file offset, so all we need to do is return the difference between the closest newline and the center of the buffer
                if (closestNewLinePos >= 0)
                {
                    return(closestNewLinePos - middlePoint);
                }
            }

            //if we get this far, we were unable to find a record boundary within our limits => fail the upload
            throw new UploadFailedException(
                      string.Format(
                          "Unable to locate a record boundary within {0}MB on either side of segment {1} (offset {2}). This means the record at that offset is larger than {0}MB.",
                          _maxAppendLength / 1024 / 1024 / 2,
                          segment.SegmentNumber,
                          segment.Offset,
                          _maxAppendLength / 1024 / 1024));
        }
        /// <summary>
        /// Constructs a new UploadMetadata from the given parameters.
        /// </summary>
        /// <param name="metadataFilePath">The file path to assign to this metadata file (for saving purposes).</param>
        /// <param name="uploadParameters">The parameters to use for constructing this metadata.</param>
        internal UploadMetadata(string metadataFilePath, UploadParameters uploadParameters)
        {
            this.MetadataFilePath = metadataFilePath;

            this.UploadId         = Guid.NewGuid().ToString("N");
            this.InputFilePath    = uploadParameters.InputFilePath;
            this.TargetStreamPath = uploadParameters.TargetStreamPath;

            string streamDirectory;
            var    streamName = SplitTargetStreamPathByName(out streamDirectory);

            if (string.IsNullOrEmpty(streamDirectory))
            {
                // the scenario where the file is being uploaded at the root
                this.SegmentStreamDirectory = string.Format("/{0}.segments.{1}", streamName, Guid.NewGuid());
            }
            else
            {
                // the scenario where the file is being uploaded in a sub folder
                this.SegmentStreamDirectory = string.Format("{0}/{1}.segments.{2}",
                                                            streamDirectory,
                                                            streamName, Guid.NewGuid());
            }

            this.IsBinary = uploadParameters.IsBinary;

            var fileInfo = new FileInfo(uploadParameters.InputFilePath);

            this.FileLength = fileInfo.Length;

            // we are taking the smaller number of segments between segment lengths of 256 and the segment growth logic.
            // this protects us against agressive increase of thread count resulting in far more segments than
            // is reasonable for a given file size. We also ensure that each segment is at least 256mb in size.
            // This is the size that ensures we have the optimal storage creation in the store.
            var preliminarySegmentCount = (int)Math.Ceiling((double)fileInfo.Length / uploadParameters.MaxSegementLength);

            this.SegmentCount  = Math.Min(preliminarySegmentCount, UploadSegmentMetadata.CalculateSegmentCount(fileInfo.Length));
            this.SegmentLength = UploadSegmentMetadata.CalculateSegmentLength(fileInfo.Length, this.SegmentCount);

            this.Segments = new UploadSegmentMetadata[this.SegmentCount];
            for (int i = 0; i < this.SegmentCount; i++)
            {
                this.Segments[i] = new UploadSegmentMetadata(i, this);
            }
        }
Пример #5
0
        /// <summary>
        /// Constructs a new UploadMetadata from the given parameters.
        /// </summary>
        /// <param name="metadataFilePath">The file path to assign to this metadata file (for saving purposes).</param>
        /// <param name="uploadParameters">The parameters to use for constructing this metadata.</param>
        /// <param name="frontEnd">The front end. This is used only in the constructor for determining file length</param>
        internal UploadMetadata(string metadataFilePath, UploadParameters uploadParameters, IFrontEndAdapter frontEnd, long fileSize = -1)
        {
            this.MetadataFilePath = metadataFilePath;

            this.UploadId         = Guid.NewGuid().ToString("N");
            this.InputFilePath    = uploadParameters.InputFilePath;
            this.TargetStreamPath = uploadParameters.TargetStreamPath;
            this.IsDownload       = uploadParameters.IsDownload;

            this.SegmentStreamDirectory = GetSegmentStreamDirectory();

            this.IsBinary = uploadParameters.IsBinary;

            this.FileLength = fileSize < 0 ? frontEnd.GetStreamLength(uploadParameters.InputFilePath, !IsDownload) : fileSize;

            this.EncodingCodePage = uploadParameters.FileEncoding.CodePage;

            // we are taking the smaller number of segments between segment lengths of 256 and the segment growth logic.
            // this protects us against agressive increase of thread count resulting in far more segments than
            // is reasonable for a given file size. We also ensure that each segment is at least 256mb in size.
            // This is the size that ensures we have the optimal storage creation in the store.
            var preliminarySegmentCount = (int)Math.Ceiling((double)this.FileLength / uploadParameters.MaxSegementLength);

            this.SegmentCount  = Math.Min(preliminarySegmentCount, UploadSegmentMetadata.CalculateSegmentCount(this.FileLength));
            this.SegmentLength = UploadSegmentMetadata.CalculateSegmentLength(this.FileLength, this.SegmentCount);

            this.Segments = new UploadSegmentMetadata[this.SegmentCount];
            for (int i = 0; i < this.SegmentCount; i++)
            {
                this.Segments[i] = new UploadSegmentMetadata(i, this);
            }

            if (!uploadParameters.IsBinary && this.SegmentCount > 1 && !this.IsDownload)
            {
                this.AlignSegmentsToRecordBoundaries();

                // ensure that nothing strange happened during alignment
                this.ValidateConsistency();
            }

            // initialize the status to pending, since it is not yet done.
            this.Status = SegmentUploadStatus.Pending;
        }
        /// <summary>
        /// Calculates the value by which we'd need to adjust the length of the given segment, by searching for the nearest newline around it (before and after), 
        /// and returning the distance to it (which can be positive, if after, or negative, if before).
        /// </summary>
        /// <param name="segment"></param>
        /// <param name="stream"></param>
        /// <returns></returns>
        /// <exception cref="Microsoft.Azure.Management.DataLake.StoreUploader.UploadFailedException">If no record boundary could be located on either side of the segment end offset within the allowed distance.</exception>
        private int DetermineLengthAdjustment(UploadSegmentMetadata segment, FileStream stream)
        {
            long referenceFileOffset = segment.Offset + segment.Length;
            byte[] buffer = new byte[_maxAppendLength];

            //read 2MB before the segment boundary and 2MB after (for a total of 4MB = max append length)
            int bytesRead = ReadIntoBufferAroundReference(stream, buffer, referenceFileOffset);
            if (bytesRead > 0)
            {
                int middlePoint = bytesRead / 2;
                //search for newline in it
                int newLinePosBefore = StringExtensions.FindNewline(buffer, middlePoint + 1, middlePoint + 1, true);
                
                //in some cases, we may have a newline that is 2 characters long, and it occurrs exactly on the midpoint, which means we won't be able to find its end.
                //see if that's the case, and then search for a new candidate before it.
                if (newLinePosBefore == middlePoint + 1 && buffer[newLinePosBefore] == (byte)'\r')
                {
                    int newNewLinePosBefore = StringExtensions.FindNewline(buffer, middlePoint, middlePoint, true);
                    if (newNewLinePosBefore >= 0)
                    {
                        newLinePosBefore = newNewLinePosBefore;
                    }
                }

                int newLinePosAfter = StringExtensions.FindNewline(buffer, middlePoint, middlePoint, false);
                if (newLinePosAfter == buffer.Length - 1 && buffer[newLinePosAfter] == (byte)'\r' && newLinePosBefore >= 0)
                {
                    newLinePosAfter = -1;
                }

                int closestNewLinePos = FindClosestToCenter(newLinePosBefore, newLinePosAfter, middlePoint);

                //middle point of the buffer corresponds to the reference file offset, so all we need to do is return the difference between the closest newline and the center of the buffer
                if (closestNewLinePos >= 0)
                {
                    return closestNewLinePos - middlePoint;
                }
            }

            //if we get this far, we were unable to find a record boundary within our limits => fail the upload
            throw new UploadFailedException(
                string.Format(
                    "Unable to locate a record boundary within {0}MB on either side of segment {1} (offset {2}). This means the record at that offset is larger than {0}MB.",
                    _maxAppendLength / 1024 / 1024 / 2,
                    segment.SegmentNumber,
                    segment.Offset,
                    _maxAppendLength / 1024 / 1024));
        }
 private byte[] GetExpectedContents(UploadSegmentMetadata segment, UploadMetadata metadata)
 {
     byte[] result = new byte[segment.Length];
     Array.Copy(_smallFileContents, segment.SegmentNumber * metadata.SegmentLength, result, 0, segment.Length);
     return result;
 }
        private void VerifyTargetStreamIsComplete(UploadSegmentMetadata segmentMetadata, UploadMetadata metadata, InMemoryFrontEnd frontEnd)
        {
            Assert.Equal(SegmentUploadStatus.Complete, segmentMetadata.Status);
            Assert.True(frontEnd.StreamExists(segmentMetadata.Path), string.Format("Segment {0} was not uploaded", segmentMetadata.SegmentNumber));
            Assert.Equal(segmentMetadata.Length, frontEnd.GetStreamLength(segmentMetadata.Path));

            var actualContents = frontEnd.GetStreamContents(segmentMetadata.Path);
            var expectedContents = GetExpectedContents(segmentMetadata, metadata);
            AssertExtensions.AreEqual(expectedContents, actualContents, "Segment {0} has unexpected contents", segmentMetadata.SegmentNumber);
        }