public StartableBlock <DataBatch> Create(string peopleJsonFilePath, string targetFilePath, IProgress <PipelineProgress> progress, CancellationTokenSource cancellationSource) { var dataPool = new DataPool(); // Create blocks var readBlock = _readingBlockFactory.Create(peopleJsonFilePath, dataPool, cancellationSource.Token); var validateBlock = ProcessingBlock <DataBatch> .Create("Validate", DataBatch.IdGetter, x => x.Data.Where(item => item.IsValid).ForEach(_personValidator.Validate), cancellationSource.Token, Settings.ProcessInParallel?Settings.MaxDegreeOfParallelism : 1); var computeFieldsBlock = ProcessingBlock <DataBatch> .Create("ComputeFields", DataBatch.IdGetter, x => x.Data.Where(item => item.IsValid).ForEach(_personFieldsComputer.Compute), cancellationSource.Token, Settings.ProcessInParallel?Settings.MaxDegreeOfParallelism : 1); var extraProcessingBlocks = CreateExtraProcessingBlocks(cancellationSource); var writeBlock = _writingBlockFactory.Create(targetFilePath, cancellationSource.Token); var progressBlock = _progressReportingBlockFactory.Create(DataBatch.IdGetter, progress, readBlock.EstimatedOutputCount, 1, cancellationSource.Token); var disposeBlock = ProcessingBlock <DataBatch> .Create("DisposeData", DataBatch.IdGetter, x => x.Data.ForEach(dataPool.Return), cancellationSource.Token); return(_straightPipelineFactory.Create(readBlock, new[] { validateBlock, computeFieldsBlock }.Concat(extraProcessingBlocks) .Concat(new[] { writeBlock, progressBlock, disposeBlock }) .ToArray(), cancellationSource)); }
public StartableBlock <Data> Create(string peopleJsonFilePath, string targetFilePath, string errorsFilePath, IProgress <PipelineProgress> progress, CancellationTokenSource cancellationSource) { var dataPool = new DataPool(); // Create blocks // TODO: Progress reporting approach 1: before anything var readBlock = _readingBlockFactory.Create(peopleJsonFilePath, dataPool, cancellationSource.Token); var validateBlock = new ProcessingBlock <Data>(DataflowFacade.TransformBlock("Validate", Data.IdGetter, x => _personValidator.Validate(x), cancellationSource.Token, Settings.DegreeOfParallelism)); var computeFieldsBlock = new ProcessingBlock <Data>(DataflowFacade.TransformBlock("ComputeFields", Data.IdGetter, x => _personFieldsComputer.Compute(x), cancellationSource.Token, Settings.DegreeOfParallelism)); var writeBlock = _writingBlockFactory.Create(targetFilePath, cancellationSource.Token); var throwBlock = Settings.ThrowTest ? _throwingBlockFactory.Create(Data.IdGetter, cancellationSource.Token) : _emptyBlockFactory.Create <Data>(cancellationSource.Token); var handleErrorBlock = _writingBlockFactory.Create(errorsFilePath, cancellationSource.Token); var progressBlock = _progressReportingBlockFactory.Create("ReportProgress", Data.IdGetter, progress, readBlock.EstimatedOutputCount, Settings.ProgressBatchSize, cancellationSource.Token); return(_railroadPipelineFactory.Create(readBlock, new[] { validateBlock, computeFieldsBlock, writeBlock, throwBlock }, handleErrorBlock, progressBlock, x => x.IsValid, cancellationSource)); }
public StartableBlock <DataBatch> Create(string peopleJsonFilePath, string targetFilePath, IProgress <PipelineProgress> progress, CancellationTokenSource cancellationSource) { var dataPool = new DataPool(); // Create blocks var readBlock = _readingBlockFactory.Create(peopleJsonFilePath, dataPool, cancellationSource.Token); var processBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("Process", DataBatch.IdGetter, x => { x.Data.Where(item => item.IsValid).ForEach(_personValidator.Validate); x.Data.Where(item => item.IsValid).ForEach(_personFieldsComputer.Compute); for (var i = 0; i < Settings.ExtraProcessingBlocksCount; i++) { x.Data.ForEach(_ => ComputationsHelper.PerformTimeConsumingOperation()); } }, cancellationSource.Token, Settings.DegreeOfParallelism)); var writeBlock = _writingBlockFactory.Create(targetFilePath, cancellationSource.Token); var progressBlock = _progressReportingBlockFactory.Create("ReportProgress", DataBatch.IdGetter, progress, readBlock.EstimatedOutputCount, 1, cancellationSource.Token); var disposeBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("DisposeData", DataBatch.IdGetter, x => x.Data.ForEach(dataPool.Return), cancellationSource.Token)); return(_straightPipelineFactory.Create(readBlock, new[] { processBlock, writeBlock, progressBlock, disposeBlock }, cancellationSource)); }