public async Task UpdateLinksAsync(AwsManager awsManager, ISourceBlock<Tuple<AnnotatedPath, IFileFingerprint>> linkBlobs, CancellationToken cancellationToken) { try { await _linkManager.CreateLinksAsync(awsManager, linkBlobs, _s3Settings.ActuallyWrite, cancellationToken).ConfigureAwait(false); Debug.WriteLine("Done processing links"); } catch (Exception ex) { Debug.WriteLine("Processing links failed: " + ex.Message); } }
async Task CreateLinksBlockAsync(AwsManager awsManager, string collection, ISourceBlock<Tuple<AnnotatedPath, IFileFingerprint>> collectionBlock, bool actuallyWrite, CancellationToken cancellationToken) { var links = await awsManager.GetLinksAsync(collection, cancellationToken).ConfigureAwait(false); Debug.WriteLine($"Link handler for {collection} found {links.Count} existing links"); var createLinkBlock = new ActionBlock<S3Links.ICreateLinkRequest>( link => CreateLinkAsync(awsManager, link, actuallyWrite, cancellationToken), new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 512, CancellationToken = cancellationToken }); var makeLinkBlock = new TransformBlock<Tuple<AnnotatedPath, IFileFingerprint>, S3Links.ICreateLinkRequest>( tuple => { var path = tuple.Item1; var file = tuple.Item2; if (collection != path.Collection) throw new InvalidOperationException($"Create link for {path.Collection} on {collection}"); var relativePath = path.RelativePath; if (relativePath.StartsWith("..")) throw new InvalidOperationException($"Create link for invalid path {relativePath}"); if (relativePath.StartsWith("file:", StringComparison.OrdinalIgnoreCase)) throw new InvalidOperationException($"Create link for invalid path {relativePath}"); relativePath = relativePath.Replace('\\', '/'); if (relativePath.StartsWith("/", StringComparison.Ordinal)) throw new InvalidOperationException($"Create link for invalid path {relativePath}"); string eTag; links.TryGetValue(relativePath, out eTag); return awsManager.BuildLinkRequest(collection, relativePath, file, eTag); }, new ExecutionDataflowBlockOptions { CancellationToken = cancellationToken, MaxDegreeOfParallelism = Environment.ProcessorCount }); makeLinkBlock.LinkTo(createLinkBlock, new DataflowLinkOptions { PropagateCompletion = true }, link => null != link); makeLinkBlock.LinkTo(DataflowBlock.NullTarget<S3Links.ICreateLinkRequest>()); collectionBlock.LinkTo(makeLinkBlock, new DataflowLinkOptions { PropagateCompletion = true }); await createLinkBlock.Completion.ConfigureAwait(false); Debug.WriteLine($"Link handler for {collection} is done"); }
public async Task CreateLinksAsync(AwsManager awsManager, ISourceBlock<Tuple<AnnotatedPath, IFileFingerprint>> blobSourceBlock, bool actuallyWrite, CancellationToken cancellationToken) { var collectionBlocks = new Dictionary<string, ITargetBlock<Tuple<AnnotatedPath, IFileFingerprint>>>(); var tasks = new List<Task>(); var routeBlock = new ActionBlock<Tuple<AnnotatedPath, IFileFingerprint>>(async blob => { var collection = blob.Item1.Collection; if (string.IsNullOrEmpty(collection)) return; ITargetBlock<Tuple<AnnotatedPath, IFileFingerprint>> collectionBlock; if (!collectionBlocks.TryGetValue(collection, out collectionBlock)) { var bufferBlock = new BufferBlock<Tuple<AnnotatedPath, IFileFingerprint>>(); collectionBlock = bufferBlock; collectionBlocks[collection] = collectionBlock; var task = CreateLinksBlockAsync(awsManager, collection, bufferBlock, actuallyWrite, cancellationToken); tasks.Add(task); } await collectionBlock.SendAsync(blob, cancellationToken).ConfigureAwait(false); }); blobSourceBlock.LinkTo(routeBlock, new DataflowLinkOptions { PropagateCompletion = true }); await routeBlock.Completion.ConfigureAwait(false); Debug.WriteLine("S3LinkCreateor.CreateLinkAsync() routeBlock is done"); foreach (var block in collectionBlocks.Values) block.Complete(); await Task.WhenAll(tasks).ConfigureAwait(false); Debug.WriteLine("S3LinkCreateor.CreateLinkAsync() all link blocks are done"); }
public Task UploadBlobsAsync(AwsManager awsManager, ISourceBlock<Tuple<IFileFingerprint, AnnotatedPath>> uniqueBlobBlock, IReadOnlyDictionary<string, string> knowObjects, CancellationToken cancellationToken) { var blobCount = 0; var blobTotalSize = 0L; var builderBlock = new TransformBlock<Tuple<IFileFingerprint, AnnotatedPath>, S3Blobs.IUploadBlobRequest>( tuple => { string etag; var exists = knowObjects.TryGetValue(tuple.Item1.Fingerprint.Key(), out etag); //Debug.WriteLine($"{tuple.Item1.FullFilePath} {(exists ? "already exists" : "scheduled for upload")}"); if (exists) { // We can't check multipart uploads this way since we don't know the size // of the individual parts. if (etag.Contains("-")) { Debug.WriteLine($"{tuple.Item1.FullFilePath} is a multi-part upload with ETag {etag} {tuple.Item1.Fingerprint.Key().Substring(0, 12)}"); return null; } var expectedETag = tuple.Item1.Fingerprint.S3ETag(); if (string.Equals(expectedETag, etag, StringComparison.InvariantCultureIgnoreCase)) return null; Console.WriteLine($"ERROR: {tuple.Item1.FullFilePath} tag mismatch {etag}, expected {expectedETag} {tuple.Item1.Fingerprint.Key().Substring(0, 12)}"); } var request = awsManager.BuildUploadBlobRequest(tuple); if (null == request) return null; Interlocked.Increment(ref blobCount); Interlocked.Add(ref blobTotalSize, request.FileFingerprint.Fingerprint.Size); return request; }, new ExecutionDataflowBlockOptions { CancellationToken = cancellationToken, MaxDegreeOfParallelism = Environment.ProcessorCount }); var uploader = new ActionBlock<S3Blobs.IUploadBlobRequest>( blob => UploadBlobAsync(awsManager, blob, cancellationToken), new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 4, CancellationToken = cancellationToken }); builderBlock.LinkTo(uploader, new DataflowLinkOptions { PropagateCompletion = true }, r => null != r); builderBlock.LinkTo(DataflowBlock.NullTarget<S3Blobs.IUploadBlobRequest>()); uniqueBlobBlock.LinkTo(builderBlock, new DataflowLinkOptions { PropagateCompletion = true }); var tasks = new List<Task>(); #if DEBUG var uploadDoneTask = uploader.Completion .ContinueWith( _ => Debug.WriteLine($"Done uploading blobs: {blobCount} items {SizeConversion.BytesToGiB(blobTotalSize):F2}GiB"), cancellationToken); tasks.Add(uploadDoneTask); #endif tasks.Add(uploader.Completion); return Task.WhenAll(tasks); }
async Task UploadBlobAsync(AwsManager awsManager, S3Blobs.IUploadBlobRequest uploadBlobRequest, CancellationToken cancellationToken) { if (null == uploadBlobRequest) return; Console.WriteLine("Upload {0} as {1}", uploadBlobRequest.FileFingerprint.FullFilePath, uploadBlobRequest.FileFingerprint.Fingerprint.Key().Substring(0, 12)); if (!_s3Settings.ActuallyWrite) return; try { await awsManager.UploadBlobAsync(uploadBlobRequest, cancellationToken).ConfigureAwait(false); } catch (Exception ex) { Console.WriteLine("Upload of {0} failed: {1}", uploadBlobRequest.FileFingerprint.FullFilePath, ex.Message); } }
async Task CreateLinkAsync(AwsManager awsManager, S3Links.ICreateLinkRequest createLinkRequest, bool actuallyWrite, CancellationToken cancellationToken) { if (cancellationToken.IsCancellationRequested) return; var relativePath = createLinkRequest.RelativePath; var key = createLinkRequest.FileFingerprint.Fingerprint.Key(); Console.WriteLine("Link {0} \"{1}\" -> {2} ({3})", createLinkRequest.Collection, relativePath, key.Substring(0, 12), createLinkRequest.FileFingerprint.WasCached ? "cached" : "new"); if (!actuallyWrite) return; try { await awsManager.CreateLinkAsync(createLinkRequest, cancellationToken).ConfigureAwait(false); } catch (OperationCanceledException) { } catch (Exception ex) { Console.WriteLine("Link {0} {1} -> {2} failed: {3}", createLinkRequest.Collection, relativePath, key.Substring(0, 12), ex.Message); } }