Exemple #1
0
        public StartableBlock <DataBatch> Create(string peopleJsonFilePath,
                                                 string targetFilePath,
                                                 IProgress <PipelineProgress> progress,
                                                 CancellationTokenSource cancellationSource)
        {
            var dataPool = new DataPool();

            // Create blocks
            var readBlock     = _readingBlockFactory.Create(peopleJsonFilePath, dataPool, cancellationSource.Token);
            var validateBlock = ProcessingBlock <DataBatch> .Create("Validate",
                                                                    DataBatch.IdGetter,
                                                                    x => x.Data.Where(item => item.IsValid).ForEach(_personValidator.Validate),
                                                                    cancellationSource.Token,
                                                                    Settings.ProcessInParallel?Settings.MaxDegreeOfParallelism : 1);

            var computeFieldsBlock = ProcessingBlock <DataBatch> .Create("ComputeFields",
                                                                         DataBatch.IdGetter,
                                                                         x => x.Data.Where(item => item.IsValid).ForEach(_personFieldsComputer.Compute),
                                                                         cancellationSource.Token,
                                                                         Settings.ProcessInParallel?Settings.MaxDegreeOfParallelism : 1);

            var extraProcessingBlocks = CreateExtraProcessingBlocks(cancellationSource);
            var writeBlock            = _writingBlockFactory.Create(targetFilePath, cancellationSource.Token);
            var progressBlock         = _progressReportingBlockFactory.Create(DataBatch.IdGetter, progress, readBlock.EstimatedOutputCount, 1, cancellationSource.Token);
            var disposeBlock          = ProcessingBlock <DataBatch> .Create("DisposeData",
                                                                            DataBatch.IdGetter,
                                                                            x => x.Data.ForEach(dataPool.Return),
                                                                            cancellationSource.Token);

            return(_straightPipelineFactory.Create(readBlock,
                                                   new[] { validateBlock, computeFieldsBlock }.Concat(extraProcessingBlocks)
                                                   .Concat(new[] { writeBlock, progressBlock, disposeBlock })
                                                   .ToArray(),
                                                   cancellationSource));
        }
Exemple #2
0
        public StartableBlock <Data> Create(string peopleJsonFilePath,
                                            string targetFilePath,
                                            string errorsFilePath,
                                            IProgress <PipelineProgress> progress,
                                            CancellationTokenSource cancellationSource)
        {
            var dataPool = new DataPool();

            // Create blocks
            // TODO: Progress reporting approach 1: before anything
            var readBlock     = _readingBlockFactory.Create(peopleJsonFilePath, dataPool, cancellationSource.Token);
            var validateBlock = new ProcessingBlock <Data>(DataflowFacade.TransformBlock("Validate",
                                                                                         Data.IdGetter,
                                                                                         x => _personValidator.Validate(x),
                                                                                         cancellationSource.Token,
                                                                                         Settings.DegreeOfParallelism));
            var computeFieldsBlock = new ProcessingBlock <Data>(DataflowFacade.TransformBlock("ComputeFields",
                                                                                              Data.IdGetter,
                                                                                              x => _personFieldsComputer.Compute(x),
                                                                                              cancellationSource.Token,
                                                                                              Settings.DegreeOfParallelism));
            var writeBlock = _writingBlockFactory.Create(targetFilePath, cancellationSource.Token);
            var throwBlock = Settings.ThrowTest
                                 ? _throwingBlockFactory.Create(Data.IdGetter, cancellationSource.Token)
                                 : _emptyBlockFactory.Create <Data>(cancellationSource.Token);
            var handleErrorBlock = _writingBlockFactory.Create(errorsFilePath, cancellationSource.Token);
            var progressBlock    = _progressReportingBlockFactory.Create("ReportProgress",
                                                                         Data.IdGetter,
                                                                         progress,
                                                                         readBlock.EstimatedOutputCount,
                                                                         Settings.ProgressBatchSize,
                                                                         cancellationSource.Token);

            return(_railroadPipelineFactory.Create(readBlock,
                                                   new[] { validateBlock, computeFieldsBlock, writeBlock, throwBlock },
                                                   handleErrorBlock,
                                                   progressBlock,
                                                   x => x.IsValid,
                                                   cancellationSource));
        }
Exemple #3
0
        public StartableBlock <DataBatch> Create(string peopleJsonFilePath,
                                                 string targetFilePath,
                                                 IProgress <PipelineProgress> progress,
                                                 CancellationTokenSource cancellationSource)
        {
            var dataPool = new DataPool();

            // Create blocks
            var readBlock    = _readingBlockFactory.Create(peopleJsonFilePath, dataPool, cancellationSource.Token);
            var processBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("Process",
                                                                                             DataBatch.IdGetter,
                                                                                             x =>
            {
                x.Data.Where(item => item.IsValid).ForEach(_personValidator.Validate);
                x.Data.Where(item => item.IsValid).ForEach(_personFieldsComputer.Compute);

                for (var i = 0; i < Settings.ExtraProcessingBlocksCount; i++)
                {
                    x.Data.ForEach(_ => ComputationsHelper.PerformTimeConsumingOperation());
                }
            },
                                                                                             cancellationSource.Token,
                                                                                             Settings.DegreeOfParallelism));
            var writeBlock    = _writingBlockFactory.Create(targetFilePath, cancellationSource.Token);
            var progressBlock = _progressReportingBlockFactory.Create("ReportProgress",
                                                                      DataBatch.IdGetter,
                                                                      progress,
                                                                      readBlock.EstimatedOutputCount,
                                                                      1,
                                                                      cancellationSource.Token);
            var disposeBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("DisposeData",
                                                                                             DataBatch.IdGetter,
                                                                                             x => x.Data.ForEach(dataPool.Return),
                                                                                             cancellationSource.Token));

            return(_straightPipelineFactory.Create(readBlock,
                                                   new[] { processBlock, writeBlock, progressBlock, disposeBlock },
                                                   cancellationSource));
        }