Task SetAnnotatedPathAsync(AnnotatedPath annotatedPath)
        {
            var pathFingerprint = GetPathFingerprint(annotatedPath.FileInfo.FullName);

            var key = Tuple.Create(annotatedPath.Collection, annotatedPath.RelativePath);

            FileFingerprint fileFingerprint;

            lock (pathFingerprint)
            {
                if (pathFingerprint.AnnotatedPaths.ContainsKey(key))
                {
                    throw new InvalidOperationException("Duplicate collection/relative path for " + annotatedPath.FileInfo.FullName);
                }

                pathFingerprint.AnnotatedPaths[key] = annotatedPath;

                fileFingerprint = pathFingerprint.FileFingerprint;
            }

            if (null == fileFingerprint)
            {
                return(Task.CompletedTask);
            }

            return(_targetBlock.SendAsync(Tuple.Create(annotatedPath, fileFingerprint)));
        }
Esempio n. 2
0
        public static Task GenerateAnnotatedPathsAsync(IEnumerable <CollectionPath> paths,
                                                       Func <FileInfo, bool> filePredicate,
                                                       ITargetBlock <AnnotatedPath[]> filePathTargetBlock,
                                                       CancellationToken cancellationToken)
        {
            var shuffleBlock = new TransformBlock <AnnotatedPath[], AnnotatedPath[]>(
                filenames =>
            {
                // Sequential names tend to fall into the same AWS S3 partition, so we
                // shuffle things around.
                RandomUtil.Shuffle(filenames);

                return(filenames);
            }, new ExecutionDataflowBlockOptions {
                CancellationToken = cancellationToken, MaxDegreeOfParallelism = Environment.ProcessorCount
            });

            shuffleBlock.LinkTo(filePathTargetBlock, new DataflowLinkOptions {
                PropagateCompletion = true
            });

            var batcher = new BatchBlock <AnnotatedPath>(2048, new GroupingDataflowBlockOptions {
                CancellationToken = cancellationToken
            });

            batcher.LinkTo(shuffleBlock, new DataflowLinkOptions
            {
                PropagateCompletion = true
            });

            var scanTasks = paths
                            .Select <CollectionPath, Task>(path =>
                                                           Task.Factory.StartNew(
                                                               async() =>
            {
                foreach (var file in PathUtil.ScanDirectory(path.Path, filePredicate))
                {
                    if (cancellationToken.IsCancellationRequested)
                    {
                        break;
                    }

                    var relativePath = PathUtil.MakeRelativePath(path.Path, file.FullName);

                    var annotatedPath = new AnnotatedPath(file, path.Collection ?? path.Path, relativePath);

                    await batcher.SendAsync(annotatedPath, cancellationToken).ConfigureAwait(false);
                }
            },
                                                               cancellationToken,
                                                               TaskCreationOptions.DenyChildAttach | TaskCreationOptions.LongRunning | TaskCreationOptions.RunContinuationsAsynchronously,
                                                               TaskScheduler.Default));

            var task = Task.WhenAll(scanTasks);

            var localFilePathTargetBlock = (ITargetBlock <AnnotatedPath>)batcher;

            var completeTask = task.ContinueWith(_ => localFilePathTargetBlock.Complete(), cancellationToken);

            TaskCollector.Default.Add(completeTask, "GenerateAnnotatedPathsAsync");

            return(task);
        }