Task SetAnnotatedPathAsync(AnnotatedPath annotatedPath) { var pathFingerprint = GetPathFingerprint(annotatedPath.FileInfo.FullName); var key = Tuple.Create(annotatedPath.Collection, annotatedPath.RelativePath); FileFingerprint fileFingerprint; lock (pathFingerprint) { if (pathFingerprint.AnnotatedPaths.ContainsKey(key)) { throw new InvalidOperationException("Duplicate collection/relative path for " + annotatedPath.FileInfo.FullName); } pathFingerprint.AnnotatedPaths[key] = annotatedPath; fileFingerprint = pathFingerprint.FileFingerprint; } if (null == fileFingerprint) { return(Task.CompletedTask); } return(_targetBlock.SendAsync(Tuple.Create(annotatedPath, fileFingerprint))); }
public static Task GenerateAnnotatedPathsAsync(IEnumerable <CollectionPath> paths, Func <FileInfo, bool> filePredicate, ITargetBlock <AnnotatedPath[]> filePathTargetBlock, CancellationToken cancellationToken) { var shuffleBlock = new TransformBlock <AnnotatedPath[], AnnotatedPath[]>( filenames => { // Sequential names tend to fall into the same AWS S3 partition, so we // shuffle things around. RandomUtil.Shuffle(filenames); return(filenames); }, new ExecutionDataflowBlockOptions { CancellationToken = cancellationToken, MaxDegreeOfParallelism = Environment.ProcessorCount }); shuffleBlock.LinkTo(filePathTargetBlock, new DataflowLinkOptions { PropagateCompletion = true }); var batcher = new BatchBlock <AnnotatedPath>(2048, new GroupingDataflowBlockOptions { CancellationToken = cancellationToken }); batcher.LinkTo(shuffleBlock, new DataflowLinkOptions { PropagateCompletion = true }); var scanTasks = paths .Select <CollectionPath, Task>(path => Task.Factory.StartNew( async() => { foreach (var file in PathUtil.ScanDirectory(path.Path, filePredicate)) { if (cancellationToken.IsCancellationRequested) { break; } var relativePath = PathUtil.MakeRelativePath(path.Path, file.FullName); var annotatedPath = new AnnotatedPath(file, path.Collection ?? path.Path, relativePath); await batcher.SendAsync(annotatedPath, cancellationToken).ConfigureAwait(false); } }, cancellationToken, TaskCreationOptions.DenyChildAttach | TaskCreationOptions.LongRunning | TaskCreationOptions.RunContinuationsAsynchronously, TaskScheduler.Default)); var task = Task.WhenAll(scanTasks); var localFilePathTargetBlock = (ITargetBlock <AnnotatedPath>)batcher; var completeTask = task.ContinueWith(_ => localFilePathTargetBlock.Complete(), cancellationToken); TaskCollector.Default.Add(completeTask, "GenerateAnnotatedPathsAsync"); return(task); }