/// <summary> /// Re-writes the blobs from the source to the destination after passing them through the list of sanitizers. /// </summary> /// <param name="token">A cancellation token for the async operation.</param> /// <param name="blobPrefix">A blob prefix to filter the blobs that will be processed. Use null if no filter needed.</param> /// <returns>The awaitable async operation.</returns> public async Task ProcessAsync(CancellationToken token, string blobPrefix = null) { bool continueProcessing = true; try { while (continueProcessing && !token.IsCancellationRequested) { var blobs = (await _source.GetFilesAsync(_maxBatchToProcess, token, blobPrefix)).ToArray(); continueProcessing = blobs.Length == _maxBatchToProcess; var workers = Enumerable.Range(0, blobs.Length).Select(i => ProcessBlobAsync(blobs[i], token)); await Task.WhenAll(workers); } } catch (AggregateException aggregateExceptions) { foreach (var innerEx in aggregateExceptions.InnerExceptions) { _logger.LogCritical(LogEvents.JobRunFailed, innerEx, "ProcessAsync: An exception was encountered."); } } catch (Exception exception) { _logger.LogCritical(LogEvents.JobRunFailed, exception, "ProcessAsync: An exception was encountered."); } }
/// <summary> /// Try to process the files from the source. /// After processing the file is cleaned. This means it wil be moved either to a archive or a deadletter container. /// </summary> /// <param name="maxFileCount">Only max this number of files will be processed at once.</param> /// <param name="fileNameTransform">A Func to be used to generate the output file name fro the input filename.</param> /// <param name="sourceContentType">The <see cref="Stats.AzureCdnLogs.Common.Collect.ContentType" for the source file./></param> /// <param name="destinationContentType">The <see cref="Stats.AzureCdnLogs.Common.Collect.ContentType" for the destination file./></param> /// <param name="token">A <see cref="System.Threading.CancellationToken"/> to be used for cancelling the operation.</param> /// <returns>A collection of exceptions if any.</returns> public virtual async Task <AggregateException> TryProcessAsync(int maxFileCount, Func <string, string> fileNameTransform, ContentType sourceContentType, ContentType destinationContentType, CancellationToken token) { ConcurrentBag <Exception> exceptions = new ConcurrentBag <Exception>(); try { var files = await _source.GetFilesAsync(maxFileCount, token); var parallelResult = Parallel.ForEach(files, (file) => { if (token.IsCancellationRequested) { return; } var lockResult = _source.TakeLockAsync(file, token).Result; if (lockResult.Item1 /*lockResult*/) { using (var inputStream = _source.OpenReadAsync(file, sourceContentType, token).Result) { var writeAction = VerifyStreamInternalAsync(file, sourceContentType, token). ContinueWith(t => { //if validation failed clean the file to not continue processing over and over if (!t.Result) { throw new ApplicationException($"File {file} failed validation."); } _destination.WriteAsync(inputStream, ProcessLogStream, fileNameTransform(file.Segments.Last()), destinationContentType, token).Wait(); }). ContinueWith(t => { AddException(exceptions, t.Exception); return(_source.CleanAsync(file, onError: t.IsFaulted, token: token).Result); }). ContinueWith(t => { AddException(exceptions, t.Exception); return(_source.ReleaseLockAsync(file, token).Result); }). ContinueWith(t => { AddException(exceptions, t.Exception); return(t.Result); }).Result; } } //log any exceptions from the renewlease task if faulted //if the task is still running at this moment any future failure would not matter if (lockResult.Item2 != null && lockResult.Item2.IsFaulted) { AddException(exceptions, lockResult.Item2.Exception); } }); } catch (Exception e) { AddException(exceptions, e); } return(exceptions.Count() > 0 ? new AggregateException(exceptions.ToArray()) : null); }
/// <summary> /// Try to process the files from the source. /// After processing the file is cleaned. This means it wil be moved either to a archive or a deadletter container. /// </summary> /// <param name="maxFileCount">Only max this number of files will be processed at once.</param> /// <param name="fileNameTransform">A Func to be used to generate the output file name fro the input filename.</param> /// <param name="sourceContentType">The <see cref="Stats.AzureCdnLogs.Common.Collect.ContentType" for the source file./></param> /// <param name="destinationContentType">The <see cref="Stats.AzureCdnLogs.Common.Collect.ContentType" for the destination file./></param> /// <param name="token">A <see cref="System.Threading.CancellationToken"/> to be used for cancelling the operation.</param> /// <returns>A collection of exceptions if any.</returns> public virtual async Task <AggregateException> TryProcessAsync(int maxFileCount, Func <string, string> fileNameTransform, ContentType sourceContentType, ContentType destinationContentType, CancellationToken token) { ConcurrentBag <Exception> exceptions = new ConcurrentBag <Exception>(); var files = (await _source.GetFilesAsync(maxFileCount, token)).ToArray(); var workers = Enumerable.Range(0, files.Length).Select(i => TryProcessBlobAsync(files[i], fileNameTransform, sourceContentType, destinationContentType, exceptions, token)); await Task.WhenAll(workers); return(exceptions.Count() > 0 ? new AggregateException(exceptions.ToArray()) : null); }