/// <summary> /// Gets a value indicating the length of a stream, in bytes. /// </summary> /// <param name="streamPath">The relative path to the stream.</param> /// <param name="isDownload">if set to <c>true</c> [is download], meaning we will get the stream length on the local machine instead of on the server.</param> /// <returns> /// The length of the stream, in bytes. /// </returns> /// <exception cref="System.Threading.Tasks.TaskCanceledException"></exception> public long GetStreamLength(string streamPath, bool isDownload = false) { if (isDownload) { return(new FileInfo(streamPath).Length); } else { var stopWatch = new Stopwatch(); stopWatch.Start(); var task = _client.FileSystem.GetFileStatusAsync(_accountName, streamPath, cancellationToken: _token); if (!task.Wait(PerRequestTimeoutMs)) { var ex = new TaskCanceledException( string.Format("Get file status operation did not complete after {0} milliseconds.", PerRequestTimeoutMs)); TracingHelper.LogError(ex); throw ex; } var fileInfoResponse = task.Result; stopWatch.Stop(); TracingHelper.LogInfo("Op:GETFILESTATUS,Path:{0},GetLength,TimeMs:{1}", streamPath, stopWatch.ElapsedMilliseconds); return((long)fileInfoResponse.FileStatus.Length); } }
/// <summary> /// Determines if the stream with given path on the server is a directory or a terminating file. /// This is used exclusively for download. /// </summary> /// <param name="streamPath">The relative path to the stream.</param> /// <returns> /// True if the stream is a directory, false otherwise. /// </returns> /// <exception cref="System.Threading.Tasks.TaskCanceledException"></exception> public bool IsDirectory(string streamPath) { var stopWatch = new Stopwatch(); stopWatch.Start(); var task = _client.FileSystem.GetFileStatusAsync(_accountName, streamPath, cancellationToken: _token); if (!task.Wait(PerRequestTimeoutMs)) { var ex = new TaskCanceledException( string.Format("Get file status operation did not complete after {0} milliseconds.", PerRequestTimeoutMs)); TracingHelper.LogError(ex); throw ex; } var fileInfoResponse = task.Result; stopWatch.Stop(); TracingHelper.LogInfo("Op:GETFILESTATUS,Path:{0},IsDirectory,TimeMs:{1}", streamPath, stopWatch.ElapsedMilliseconds); return(fileInfoResponse.FileStatus.Type.GetValueOrDefault() == FileType.DIRECTORY); }
/// <summary> /// Deletes an existing stream at the given path. /// </summary> /// <param name="streamPath">The relative path to the stream.</param> /// <param name="recurse">if set to <c>true</c> [recurse]. This is used for folder streams only.</param> /// <param name="isDownload">if set to <c>true</c> [is download], meaning we will delete a stream on the local machine instead of on the server.</param> /// <exception cref="System.Threading.Tasks.TaskCanceledException"></exception> public void DeleteStream(string streamPath, bool recurse = false, bool isDownload = false) { if (isDownload) { if (Directory.Exists(streamPath)) { Directory.Delete(streamPath, recurse); } else if (File.Exists(streamPath)) { File.Delete(streamPath); } } else { var stopWatch = new Stopwatch(); stopWatch.Start(); var task = _client.FileSystem.DeleteAsync(_accountName, streamPath, recurse, cancellationToken: _token); if (!task.Wait(PerRequestTimeoutMs)) { var ex = new TaskCanceledException(string.Format("Delete stream operation did not complete after {0} milliseconds.", PerRequestTimeoutMs)); TracingHelper.LogError(ex); throw ex; } task.GetAwaiter().GetResult(); stopWatch.Stop(); TracingHelper.LogInfo("Op:DELETE,Path:{0},recurse:{1},TimeMs:{2}", streamPath, recurse, stopWatch.ElapsedMilliseconds); } }
/// <summary> /// Concatenates the given input streams (in order) into the given target stream. /// At the end of this operation, input streams will be deleted. /// </summary> /// <param name="targetStreamPath">The relative path to the target stream.</param> /// <param name="inputStreamPaths">An ordered array of paths to the input streams.</param> /// <param name="isDownload">if set to <c>true</c> [is download], meaning we will concatenate the streams on the local machine instead of on the server.</param> /// <exception cref="System.Threading.Tasks.TaskCanceledException"></exception> public void Concatenate(string targetStreamPath, string[] inputStreamPaths, bool isDownload = false) { if (isDownload) { if (inputStreamPaths.Length != 2) { var ex = new InvalidOperationException(string.Format("Invalid list of stream paths for download finalization. Expected Paths: 2. Actual paths: {0}", inputStreamPaths.Length)); TracingHelper.LogError(ex); throw ex; } File.Move(inputStreamPaths[0], inputStreamPaths[1]); } else { // this is required for the current version of the microsoft concatenate // TODO: Improve WebHDFS concatenate to take in the list of paths to concatenate // in the request body. var paths = "sources=" + string.Join(",", inputStreamPaths); var stopWatch = new Stopwatch(); stopWatch.Start(); // For the current implementation, we require UTF8 encoding. using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(paths))) { var task = _client.FileSystem.MsConcatAsync(_accountName, targetStreamPath, stream, true, cancellationToken: _token); if (!task.Wait(PerRequestTimeoutMs)) { var ex = new TaskCanceledException( string.Format("Concatenate operation did not complete after {0} milliseconds.", PerRequestTimeoutMs)); TracingHelper.LogError(ex); throw ex; } task.GetAwaiter().GetResult(); stopWatch.Stop(); TracingHelper.LogInfo("Op:MSCONCAT,Path:{0},NumberOfStreams:{1},TimeMs:{2}", targetStreamPath, inputStreamPaths.Length, stopWatch.ElapsedMilliseconds); } } }
/// <summary> /// Verifies the downloaded stream. /// </summary> /// <exception cref="TransferFailedException"></exception> internal void VerifyDownloadedStream() { //verify that the remote stream has the length we expected. var retryCount = 0; long remoteLength = -1; while (retryCount < MaxBufferDownloadAttemptCount) { _token.ThrowIfCancellationRequested(); retryCount++; try { remoteLength = _frontEnd.GetStreamLength(_segmentMetadata.Path, _metadata.IsDownload); break; } catch (Exception e) { _token.ThrowIfCancellationRequested(); if (retryCount >= MaxBufferDownloadAttemptCount) { TracingHelper.LogError(e); throw e; } var waitTime = WaitForRetry(retryCount, this.UseBackOffRetryStrategy, _token); TracingHelper.LogInfo("VerifyDownloadedStream: GetStreamLength at path:{0} failed on try: {1} with exception: {2}. Wait time in ms before retry: {3}", _segmentMetadata.Path, retryCount, e, waitTime); } } if (_segmentMetadata.Length != remoteLength) { var ex = new TransferFailedException(string.Format("Post-download stream verification failed: target stream has a length of {0}, expected {1}", remoteLength, _segmentMetadata.Length)); TracingHelper.LogError(ex); throw ex; } }
/// <summary> /// Creates a new, empty stream at the given path. /// </summary> /// <param name="streamPath">The relative path to the stream.</param> /// <param name="overwrite">Whether to overwrite an existing stream.</param> /// <param name="data">The data.</param> /// <param name="byteCount">The byte count.</param> /// <exception cref="System.Threading.Tasks.TaskCanceledException"></exception> public void CreateStream(string streamPath, bool overwrite, byte[] data, int byteCount) { var stopWatch = new Stopwatch(); stopWatch.Start(); using (var toAppend = data != null ? new MemoryStream(data, 0, byteCount) : new MemoryStream()) { var task = _client.FileSystem.CreateAsync(_accountName, streamPath, toAppend, overwrite: overwrite, cancellationToken: _token); if (!task.Wait(PerRequestTimeoutMs)) { var ex = new TaskCanceledException(string.Format("Create stream operation did not complete after {0} milliseconds.", PerRequestTimeoutMs)); TracingHelper.LogError(ex); throw ex; } task.GetAwaiter().GetResult(); } stopWatch.Stop(); TracingHelper.LogInfo("Op:CREATE,Path:{0},Overwrite:{1},TimeMs:{2}", streamPath, overwrite, stopWatch.ElapsedMilliseconds); }
public Stream ReadStream(string streamPath, long offset, long length, bool isDownload = false) { if (isDownload) { var stopWatch = new Stopwatch(); stopWatch.Start(); var task = _client.FileSystem.OpenWithHttpMessagesAsync(_accountName, streamPath, length, offset, cancellationToken: _token); if (!task.Wait(PerRequestTimeoutMs)) { var ex = new TaskCanceledException(string.Format("Reading stream operation did not complete after {0} milliseconds. TraceId: {1}", PerRequestTimeoutMs, task.Result.RequestId)); TracingHelper.LogError(ex); throw ex; } Stream toReturn = task.GetAwaiter().GetResult().Body; stopWatch.Stop(); TracingHelper.LogInfo("Op:READ,Path:{0},offset:{1},length:{2},TimeMs:{3}", streamPath, offset, length, stopWatch.ElapsedMilliseconds); return(toReturn); } else { // note that length is not used here since we will automatically stop reading once we reach the end of the stream. var stream = new FileStream(streamPath, FileMode.Open, FileAccess.Read, FileShare.Read); if (offset >= stream.Length) { var ex = new ArgumentException("StartOffset is beyond the end of the input file", "StartOffset"); TracingHelper.LogError(ex); throw ex; } stream.Seek(offset, SeekOrigin.Begin); return(stream); } }
/// <summary> /// Appends to stream. /// </summary> /// <param name="streamPath">The stream path.</param> /// <param name="data">The data.</param> /// <param name="offset">The offset.</param> /// <param name="byteCount">The byte count.</param> /// <exception cref="System.Threading.Tasks.TaskCanceledException"></exception> public void AppendToStream(string streamPath, byte[] data, long offset, int byteCount) { var stopWatch = new Stopwatch(); stopWatch.Start(); using (var stream = new MemoryStream(data, 0, byteCount)) { var task = _client.FileSystem.AppendAsync(_accountName, streamPath, stream, offset, cancellationToken: _token); if (!task.Wait(PerRequestTimeoutMs)) { var ex = new TaskCanceledException(string.Format("Append to stream operation did not complete after {0} milliseconds.", PerRequestTimeoutMs)); TracingHelper.LogError(ex); throw ex; } task.GetAwaiter().GetResult(); } stopWatch.Stop(); TracingHelper.LogInfo("Op:APPEND,Path:{0},offset:{1},TimeMs:{2}", streamPath, offset, stopWatch.ElapsedMilliseconds); }
/// <summary> /// Determines if the stream with given path exists. /// </summary> /// <param name="streamPath">The relative path to the stream.</param> /// <param name="isDownload">if set to <c>true</c> [is download], meaning we will test if the stream exists on the local machine instead of on the server.</param> /// <returns> /// True if the stream exists, false otherwise. /// </returns> /// <exception cref="System.Threading.Tasks.TaskCanceledException"></exception> public bool StreamExists(string streamPath, bool isDownload = false) { if (isDownload) { return(File.Exists(streamPath) || Directory.Exists(streamPath)); } else { var stopWatch = new Stopwatch(); stopWatch.Start(); try { var task = _client.FileSystem.GetFileStatusAsync(_accountName, streamPath, cancellationToken: _token); if (!task.Wait(PerRequestTimeoutMs)) { var ex = new TaskCanceledException( string.Format("Get file status operation did not complete after {0} milliseconds.", PerRequestTimeoutMs)); TracingHelper.LogError(ex); throw ex; } task.GetAwaiter().GetResult(); } catch (AggregateException ex) { if (ex.InnerExceptions == null || ex.InnerExceptions.Count != 1) { TracingHelper.LogError(ex); throw ex; } var cloudEx = ex.InnerExceptions[0] as AdlsErrorException; if ((cloudEx != null && cloudEx.Response != null && cloudEx.Response.StatusCode == HttpStatusCode.NotFound) || (cloudEx.Body != null && cloudEx.Body.RemoteException != null && cloudEx.Body.RemoteException is AdlsFileNotFoundException)) { return(false); } TracingHelper.LogError(ex); throw ex; } catch (AdlsErrorException cloudEx) { if ((cloudEx.Response != null && cloudEx.Response.StatusCode == HttpStatusCode.NotFound) || (cloudEx.Body != null && cloudEx.Body.RemoteException != null && cloudEx.Body.RemoteException is AdlsFileNotFoundException)) { return(false); } TracingHelper.LogError(cloudEx); throw cloudEx; } finally { stopWatch.Stop(); TracingHelper.LogInfo("Op:GETFILESTATUS,Path:{0},TestStream,TimeMs:{1}", streamPath, stopWatch.ElapsedMilliseconds); } return(true); } }
/// <summary> /// Downloads the segment contents. /// </summary> private void DownloadSegmentContents() { // set the current offset in the stream we are reading to the offset // that this segment starts at. long curOffset = _segmentMetadata.Offset; // set the offset of the local file that we are creating to the beginning of the local stream. // this value will be used to ensure that we are always reporting the right progress and that, // in the event of faiure, we reset the local stream to the proper location. long localOffset = 0; // determine the number of requests made based on length of file divded by 32MB max size requests var numRequests = Math.Ceiling(_segmentMetadata.Length / BufferLength); // set the length remaining to ensure that only the exact number of bytes is ultimately downloaded // for this segment. var lengthRemaining = _segmentMetadata.Length; // for multi-segment files we append "inprogress" to indicate that the file is not yet ready for use. // This also protects the user from unintentionally using the file after a failed download. var streamName = _metadata.SegmentCount > 1 ? string.Format("{0}.inprogress", _metadata.TargetStreamPath) : _metadata.TargetStreamPath; using (var outputStream = new FileStream(streamName, FileMode.Open, FileAccess.Write, FileShare.ReadWrite)) { outputStream.Seek(curOffset, SeekOrigin.Begin); for (int i = 0; i < numRequests; i++) { _token.ThrowIfCancellationRequested(); int attemptCount = 0; int partialDataAttempts = 0; bool downloadCompleted = false; long dataReceived = 0; bool modifyLengthAndOffset = false; while (!downloadCompleted && attemptCount < MaxBufferDownloadAttemptCount) { _token.ThrowIfCancellationRequested(); try { long lengthToDownload = (long)BufferLength; // in the case where we got less than the expected amount of data, // only download the rest of the data from the previous request // instead of a new full buffer. if (modifyLengthAndOffset) { lengthToDownload -= dataReceived; } // test to make sure that the remaining length is larger than the max size, // otherwise just download the remaining length. if (lengthRemaining - lengthToDownload < 0) { lengthToDownload = lengthRemaining; } using (var readStream = _frontEnd.ReadStream(_metadata.InputFilePath, curOffset, lengthToDownload, _metadata.IsDownload)) { readStream.CopyTo(outputStream, (int)lengthToDownload); } var lengthReturned = outputStream.Position - curOffset; // if we got more data than we asked for something went wrong and we should retry, since we can't trust the extra data if (lengthReturned > lengthToDownload) { var ex = new TransferFailedException(string.Format("{4}: Did not download the expected amount of data in the request. Expected: {0}. Actual: {1}. From offset: {2} in remote file: {3}", lengthToDownload, outputStream.Position - curOffset, curOffset, _metadata.InputFilePath, DateTime.Now.ToString())); TracingHelper.LogError(ex); throw ex; } // we need to validate how many bytes have actually been copied to the read stream if (lengthReturned < lengthToDownload) { partialDataAttempts++; lengthRemaining -= lengthReturned; curOffset += lengthReturned; localOffset += lengthReturned; modifyLengthAndOffset = true; dataReceived += lengthReturned; ReportProgress(localOffset, false); // we will wait before the next iteration, since something went wrong and we did not receive enough data. // this could be a throttling issue or an issue with the service itself. Either way, waiting should help // reduce the liklihood of additional failures. if (partialDataAttempts >= MaxBufferDownloadAttemptCount) { var ex = new TransferFailedException(string.Format("Failed to retrieve the requested data after {0} attempts for file {1}. This usually indicates repeated server-side throttling due to exceeding account bandwidth.", MaxBufferDownloadAttemptCount, _segmentMetadata.Path)); TracingHelper.LogError(ex); throw ex; } var waitTime = WaitForRetry(partialDataAttempts, this.UseBackOffRetryStrategy, _token); TracingHelper.LogInfo("DownloadSegmentContents: ReadStream at path:{0} returned: {1} bytes. Expected: {2} bytes. Attempt: {3}. Wait time in ms before retry: {4}", _metadata.InputFilePath, lengthReturned, lengthToDownload, partialDataAttempts, waitTime); } else { downloadCompleted = true; lengthRemaining -= lengthToDownload; curOffset += lengthToDownload; localOffset += lengthToDownload; ReportProgress(localOffset, false); } } catch (Exception ex) { // update counts and reset for internal attempts attemptCount++; partialDataAttempts = 0; //if we tried more than the number of times we were allowed to, give up and throw the exception if (attemptCount >= MaxBufferDownloadAttemptCount) { ReportProgress(localOffset, true); TracingHelper.LogError(ex); throw ex; } else { var waitTime = WaitForRetry(attemptCount, this.UseBackOffRetryStrategy, _token); TracingHelper.LogInfo("DownloadSegmentContents: ReadStream at path:{0} failed on try: {1} with exception: {2}. Wait time in ms before retry: {3}", _metadata.InputFilePath, attemptCount, ex, waitTime); // forcibly put the stream back to where it should be based on where we think we are in the download. outputStream.Seek(curOffset, SeekOrigin.Begin); } } } } // full validation of the segment. if (outputStream.Position - _segmentMetadata.Offset != _segmentMetadata.Length) { var ex = new TransferFailedException(string.Format("Post-download stream segment verification failed for file {2}: target stream has a length of {0}, expected {1}. This usually indicates repeated server-side throttling due to exceeding account bandwidth.", outputStream.Position - _segmentMetadata.Offset, _segmentMetadata.Length, _segmentMetadata.Path)); TracingHelper.LogError(ex); throw ex; } } }
/// <summary> /// Uploads the buffer. /// </summary> /// <param name="buffer">The buffer.</param> /// <param name="bytesToCopy">The bytes to copy.</param> /// <param name="targetStreamOffset">The target stream offset.</param> /// <returns></returns> private long UploadBuffer(byte[] buffer, int bytesToCopy, long targetStreamOffset) { //append it to the remote stream int attemptCount = 0; bool uploadCompleted = false; while (!uploadCompleted && attemptCount < MaxBufferUploadAttemptCount) { _token.ThrowIfCancellationRequested(); attemptCount++; try { if (targetStreamOffset == 0) { _frontEnd.CreateStream(_segmentMetadata.Path, true, buffer, bytesToCopy); } else { _frontEnd.AppendToStream(_segmentMetadata.Path, buffer, targetStreamOffset, bytesToCopy); } uploadCompleted = true; targetStreamOffset += bytesToCopy; ReportProgress(targetStreamOffset, false); } catch (AggregateException e) { if (e.InnerExceptions.Count == 1 && e.InnerException is AdlsErrorException) { if (((AdlsErrorException)e.InnerException).Body.RemoteException is AdlsBadOffsetException) { // this means we tried to re-upload at the same location and the upload actually succeeded, which means we should move on. uploadCompleted = true; targetStreamOffset += bytesToCopy; ReportProgress(targetStreamOffset, false); } else { //if we tried more than the number of times we were allowed to, give up and throw the exception if (attemptCount >= MaxBufferUploadAttemptCount) { ReportProgress(targetStreamOffset, true); TracingHelper.LogError(e); throw e; } else { var waitTime = WaitForRetry(attemptCount, this.UseBackOffRetryStrategy, _token); TracingHelper.LogInfo("{0} at path:{1} failed on try: {2} with exception: {3}. Wait time in ms before retry: {4}", targetStreamOffset == 0 ? "CREATE" : "APPEND", _segmentMetadata.Path, attemptCount, e, waitTime); } } } else { //if we tried more than the number of times we were allowed to, give up and throw the exception if (attemptCount >= MaxBufferUploadAttemptCount) { ReportProgress(targetStreamOffset, true); TracingHelper.LogError(e); throw e; } else { var waitTime = WaitForRetry(attemptCount, this.UseBackOffRetryStrategy, _token); TracingHelper.LogInfo("{0} at path:{1} failed on try: {2} with exception: {3}. Wait time in ms before retry: {4}", targetStreamOffset == 0 ? "CREATE" : "APPEND", _segmentMetadata.Path, attemptCount, e, waitTime); } } } catch (AdlsErrorException e) { if (e.Body.RemoteException is AdlsBadOffsetException) { // this means we tried to re-upload at the same location and the upload actually succeeded, which means we should move on. uploadCompleted = true; targetStreamOffset += bytesToCopy; ReportProgress(targetStreamOffset, false); } else { //if we tried more than the number of times we were allowed to, give up and throw the exception if (attemptCount >= MaxBufferUploadAttemptCount) { ReportProgress(targetStreamOffset, true); TracingHelper.LogError(e); throw e; } else { var waitTime = WaitForRetry(attemptCount, this.UseBackOffRetryStrategy, _token); TracingHelper.LogInfo("{0} at path:{1} failed on try: {2} with exception: {3}. Wait time in ms before retry: {4}", targetStreamOffset == 0 ? "CREATE" : "APPEND", _segmentMetadata.Path, attemptCount, e, waitTime); } } } catch (Exception ex) { //if we tried more than the number of times we were allowed to, give up and throw the exception if (attemptCount >= MaxBufferUploadAttemptCount) { ReportProgress(targetStreamOffset, true); TracingHelper.LogError(ex); throw ex; } else { var waitTime = WaitForRetry(attemptCount, this.UseBackOffRetryStrategy, _token); TracingHelper.LogInfo("{0} at path:{1} failed on try: {2} with exception: {3}. Wait time in ms before retry: {4}", targetStreamOffset == 0 ? "CREATE" : "APPEND", _segmentMetadata.Path, attemptCount, ex, waitTime); } } } return(targetStreamOffset); }