static async Task CreateLinkAsync(IAwsManager awsManager, S3Links.ICreateLinkRequest createLinkRequest, bool actuallyWrite, CancellationToken cancellationToken) { if (cancellationToken.IsCancellationRequested) { return; } var relativePath = createLinkRequest.RelativePath; var key = createLinkRequest.FileFingerprint.Fingerprint.Key(); Console.WriteLine("Link {0} \"{1}\" -> {2} ({3})", createLinkRequest.Collection, relativePath, key.Substring(0, 12), createLinkRequest.FileFingerprint.WasCached ? "cached" : "new"); if (!actuallyWrite) { return; } try { await awsManager.CreateLinkAsync(createLinkRequest, cancellationToken).ConfigureAwait(false); } catch (OperationCanceledException) { } catch (Exception ex) { Console.WriteLine("Link {0} {1} -> {2} failed: {3}", createLinkRequest.Collection, relativePath, key.Substring(0, 12), ex.Message); } }
async Task UploadBlobAsync(IAwsManager awsManager, S3Blobs.IUploadBlobRequest uploadBlobRequest, CancellationToken cancellationToken) { if (null == uploadBlobRequest) { return; } Console.WriteLine("Upload {0} as {1}", uploadBlobRequest.FileFingerprint.FullFilePath, uploadBlobRequest.FileFingerprint.Fingerprint.Key().Substring(0, 12)); if (!_s3Settings.ActuallyWrite) { return; } try { await awsManager.UploadBlobAsync(uploadBlobRequest, cancellationToken).ConfigureAwait(false); } catch (Exception ex) { Console.WriteLine("Upload of {0} failed: {1}", uploadBlobRequest.FileFingerprint.FullFilePath, ex.Message); } }
public async Task CreateLinksAsync(IAwsManager awsManager, ISourceBlock <Tuple <AnnotatedPath, FileFingerprint> > blobSourceBlock, bool actuallyWrite, CancellationToken cancellationToken) { var collectionBlocks = new Dictionary <string, ITargetBlock <Tuple <AnnotatedPath, FileFingerprint> > >(); var tasks = new List <Task>(); var routeBlock = new ActionBlock <Tuple <AnnotatedPath, FileFingerprint> >(async blob => { var collection = blob.Item1.Collection; if (string.IsNullOrEmpty(collection)) { return; } if (!collectionBlocks.TryGetValue(collection, out var collectionBlock)) { var bufferBlock = new BufferBlock <Tuple <AnnotatedPath, FileFingerprint> >(); collectionBlock = bufferBlock; collectionBlocks[collection] = collectionBlock; var task = CreateLinksBlockAsync(awsManager, collection, bufferBlock, actuallyWrite, cancellationToken); tasks.Add(task); } await collectionBlock.SendAsync(blob, cancellationToken).ConfigureAwait(false); }); blobSourceBlock.LinkTo(routeBlock, new DataflowLinkOptions { PropagateCompletion = true }); await routeBlock.Completion.ConfigureAwait(false); Debug.WriteLine("S3LinkCreateor.CreateLinkAsync() routeBlock is done"); foreach (var block in collectionBlocks.Values) { block.Complete(); } await Task.WhenAll(tasks).ConfigureAwait(false); Debug.WriteLine("S3LinkCreateor.CreateLinkAsync() all link blocks are done"); }
public async Task UpdateLinksAsync(IAwsManager awsManager, ISourceBlock <Tuple <AnnotatedPath, FileFingerprint> > linkBlobs, CancellationToken cancellationToken) { try { await _linkManager.CreateLinksAsync(awsManager, linkBlobs, _s3Settings.ActuallyWrite, cancellationToken).ConfigureAwait(false); Debug.WriteLine("Done processing links"); } catch (Exception ex) { Debug.WriteLine("Processing links failed: " + ex.Message); } }
static async Task CreateLinksBlockAsync(IAwsManager awsManager, string collection, ISourceBlock <Tuple <AnnotatedPath, FileFingerprint> > collectionBlock, bool actuallyWrite, CancellationToken cancellationToken) { var links = await awsManager.GetLinksAsync(collection, cancellationToken).ConfigureAwait(false); Debug.WriteLine($"Link handler for {collection} found {links.Count} existing links"); var createLinkBlock = new ActionBlock <S3Links.ICreateLinkRequest>( link => CreateLinkAsync(awsManager, link, actuallyWrite, cancellationToken), new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 512, CancellationToken = cancellationToken }); var makeLinkBlock = new TransformBlock <Tuple <AnnotatedPath, FileFingerprint>, S3Links.ICreateLinkRequest>( tuple => { var path = tuple.Item1; var file = tuple.Item2; if (collection != path.Collection) { throw new InvalidOperationException($"Create link for {path.Collection} on {collection}"); } var relativePath = path.RelativePath; if (relativePath.StartsWith("..")) { throw new InvalidOperationException($"Create link for invalid path {relativePath}"); } if (relativePath.StartsWith("file:", StringComparison.OrdinalIgnoreCase)) { throw new InvalidOperationException($"Create link for invalid path {relativePath}"); } relativePath = relativePath.Replace('\\', '/'); if (relativePath.StartsWith("/", StringComparison.Ordinal)) { throw new InvalidOperationException($"Create link for invalid path {relativePath}"); } links.TryGetValue(relativePath, out var eTag); return(awsManager.BuildLinkRequest(collection, relativePath, file, eTag)); }, new ExecutionDataflowBlockOptions { CancellationToken = cancellationToken, MaxDegreeOfParallelism = Environment.ProcessorCount }); makeLinkBlock.LinkTo(createLinkBlock, new DataflowLinkOptions { PropagateCompletion = true }, link => null != link); makeLinkBlock.LinkTo(DataflowBlock.NullTarget <S3Links.ICreateLinkRequest>()); collectionBlock.LinkTo(makeLinkBlock, new DataflowLinkOptions { PropagateCompletion = true }); await createLinkBlock.Completion.ConfigureAwait(false); Debug.WriteLine($"Link handler for {collection} is done"); }
public Task UploadBlobsAsync(IAwsManager awsManager, ISourceBlock <Tuple <FileFingerprint, AnnotatedPath> > uniqueBlobBlock, IReadOnlyDictionary <string, string> knowObjects, CancellationToken cancellationToken) { var blobCount = 0; var blobTotalSize = 0L; var builderBlock = new TransformBlock <Tuple <FileFingerprint, AnnotatedPath>, S3Blobs.IUploadBlobRequest>( tuple => { var exists = knowObjects.TryGetValue(tuple.Item1.Fingerprint.Key(), out var etag); //Debug.WriteLine($"{tuple.Item1.FullFilePath} {(exists ? "already exists" : "scheduled for upload")}"); if (exists) { // We can't check multipart uploads this way since we don't know the size // of the individual parts. if (etag.Contains("-")) { Debug.WriteLine($"{tuple.Item1.FullFilePath} is a multi-part upload with ETag {etag} {tuple.Item1.Fingerprint.Key().Substring(0, 12)}"); return(null); } var expectedETag = tuple.Item1.Fingerprint.S3ETag(); if (string.Equals(expectedETag, etag, StringComparison.OrdinalIgnoreCase)) { return(null); } Console.WriteLine($"ERROR: {tuple.Item1.FullFilePath} tag mismatch {etag}, expected {expectedETag} {tuple.Item1.Fingerprint.Key().Substring(0, 12)}"); } var request = awsManager.BuildUploadBlobRequest(tuple); if (null == request) { return(null); } Interlocked.Increment(ref blobCount); Interlocked.Add(ref blobTotalSize, request.FileFingerprint.Fingerprint.Size); return(request); }, new ExecutionDataflowBlockOptions { CancellationToken = cancellationToken, MaxDegreeOfParallelism = Environment.ProcessorCount }); var uploader = new ActionBlock <S3Blobs.IUploadBlobRequest>( blob => UploadBlobAsync(awsManager, blob, cancellationToken), new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 4, CancellationToken = cancellationToken }); builderBlock.LinkTo(uploader, new DataflowLinkOptions { PropagateCompletion = true }, r => null != r); builderBlock.LinkTo(DataflowBlock.NullTarget <S3Blobs.IUploadBlobRequest>()); uniqueBlobBlock.LinkTo(builderBlock, new DataflowLinkOptions { PropagateCompletion = true }); var tasks = new List <Task> { uploader.Completion }; #if DEBUG var uploadDoneTask = uploader.Completion .ContinueWith( _ => Debug.WriteLine($"Done uploading blobs: {blobCount} items {blobTotalSize.BytesToGiB():F2}GiB"), cancellationToken); tasks.Add(uploadDoneTask); #endif return(Task.WhenAll(tasks)); }