private IPropagatorBlock <int, Data> UseLinesReaderAndParser(StreamReader peopleJsonStream, DataPool dataPool, CancellationToken cancellation) { // Create blocks // NOTE: // - extract part which must be single-thread // - ability to replace just reading (e.g. from db) var readLinesBlock = DataflowFacade.TransformManyBlock <int, Data>( "ReadLines", x => - 1, x => _streamLinesReader.Read(peopleJsonStream, x) .Select(line => { var data = dataPool.Rent(); data.PersonJson = line; return(data); }), cancellation); // NOTE: can be multi-thread var parseDataBlock = DataflowFacade.TransformBlock( "ParseData", Data.IdGetter, x => _dataParser.Parse(x), cancellation); // Link blocks readLinesBlock.LinkWithCompletion(parseDataBlock); return(DataflowBlock.Encapsulate(readLinesBlock, parseDataBlock)); }
public ProcessingBlock <TData> Create <TData>(string name, Func <TData, int> dataIdGetter, IProgress <PipelineProgress> progress, Lazy <int> estimatedInputCount, int inputPerReport, CancellationToken cancellation) { var state = new State(); // Create blocks var reportBlock = DataflowFacade.TransformBlock( name, dataIdGetter, x => TryReport(state, estimatedInputCount.Value, inputPerReport, progress), cancellation); // Handle completion var completion = reportBlock.Completion.ContinueWithStatusPropagation( x => { if (!x.IsFaulted && !x.IsCanceled && !state.Reported100) { progress.Report(new PipelineProgress { Percentage = 100 }); } }); return(new ProcessingBlock <TData>(reportBlock, completion)); }
public ProcessingBlock <DataBatch> Create(string targetFilePath, CancellationToken cancellation) { var targetDirectoryPath = Path.GetDirectoryName(targetFilePath); if (!Directory.Exists(targetDirectoryPath)) { Directory.CreateDirectory(targetDirectoryPath); } var writer = new StreamWriter(targetFilePath); // Create blocks var writeBlock = DataflowFacade.TransformBlock( "WriteData", DataBatch.IdGetter, x => x.Data.ForEach(item => _dataWriter.Write(writer, item)), cancellation); // Handle completion var completion = writeBlock.Completion.ContinueWithStatusPropagation(_ => writer.Dispose()); return(new ProcessingBlock <DataBatch> { Processor = writeBlock, Completion = completion }); }
private StartableBlock <DataBatch <Person> > CreatePeoplePipeline(string targetFilePath, PeopleSummary builtSummary, IProgress <PipelineProgress> progress, CancellationToken cancellation) { var cancellationSource = CancellationTokenSource.CreateLinkedTokenSource(cancellation); // Create blocks var readBlock = _readPeopleBlockFactory.Create(cancellationSource.Token); var writeBlock = _writeCsvBlockFactory.Create <Person>(targetFilePath, true, cancellationSource.Token); var buildSummaryBlock = new ProcessingBlock <DataBatch <Person> >(DataflowFacade.TransformBlock("BuildSummary", DataBatch <Person> .IdGetter, x => x.Data.ForEach(person => _peopleSummaryBuilder.Include(person, builtSummary)), cancellationSource.Token)); var progressBlock = _progressReportingBlockFactory.Create("PersonProgress", DataBatch <Person> .IdGetter, progress, readBlock.EstimatedOutputCount, 1, cancellationSource.Token); // Create pipeline var pipeline = _straightPipelineFactory.Create(readBlock, new[] { writeBlock, buildSummaryBlock, progressBlock }, cancellationSource); pipeline.ContinueCompletionWith(_ => cancellationSource.Dispose()); return(pipeline); }
private IEnumerable <ProcessingBlock <DataBatch> > CreateExtraProcessingBlocks(CancellationTokenSource cancellationSource) { return(Enumerable.Range(1, Settings.ExtraProcessingBlocksCount) .Select(x => new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock($"ExtraProcessing {x}", DataBatch.IdGetter, batch => batch.Data.ForEach(_ => ComputationsHelper.PerformTimeConsumingOperation()), cancellationSource.Token, Settings.DegreeOfParallelism)))); }
private IPropagatorBlock <DataBatch, DataBatch> UseDataReader(StreamReader peopleJsonStream, DataPool dataPool, CancellationToken cancellation) { return(DataflowFacade.TransformBlock <DataBatch, DataBatch>( "ReadData", DataBatch.IdGetter, x => { x.Data = _dataReader.Read(peopleJsonStream, x.IntendedSize, dataPool).ToList(); return x; }, cancellation)); }
public static ProcessingBlock <TData> Create(string name, Func <TData, int> dataIdGetter, Func <TData, TData> process, CancellationToken cancellation, int maxDegreeOfParallelism = 1) { var processor = DataflowFacade.TransformBlock <TData>(name, dataIdGetter, process, cancellation, maxDegreeOfParallelism); return(new ProcessingBlock <TData> { Processor = processor, Completion = processor.Completion }); }
public ProcessingBlock <TData> Create <TData>(Func <TData, int> dataIdGetter, CancellationToken cancellation) { // Create blocks var throwBlock = DataflowFacade.TransformBlock( "Throw", dataIdGetter, x => { throw new InvalidOperationException(); }, cancellation); return(new ProcessingBlock <TData>(throwBlock)); }
public ProcessingBlock <DataBatch <TItem> > Create <TItem>(string targetFilePath, bool appendTargetFile, CancellationToken cancellation) { var csvWriter = new Lazy <CsvWriter>(() => CreateCsvWriter(targetFilePath, appendTargetFile)); // Create blocks var writeBlock = DataflowFacade.TransformBlock($"Write{typeof(TItem).Name}", DataBatch <TItem> .IdGetter, x => csvWriter.Value.WriteRecords(x.Data), cancellation); // Handle completion var completion = writeBlock.Completion.ContinueWithStatusPropagation(_ => csvWriter.ValueIfCreated()?.Dispose()); return(new ProcessingBlock <DataBatch <TItem> >(writeBlock, completion)); }
public StartableBlock <DataBatch <Person> > Create(CancellationToken cancellation) { var batchSize = Settings.ReadingBatchSize; var batchesCount = new Lazy <int>(() => GetBatchesCount(batchSize)); // Create blocks var bufferBlock = DataflowFacade.BufferBlock <DataBatch <Person> >(cancellation); var readBlock = DataflowFacade.TransformBlock <DataBatch <Person> >("ReadPerson", DataBatch <Person> .IdGetter, x => x.Data = _peopleBatchReader.Read(x.DataOffset, x.IntendedSize), cancellation); // Link blocks bufferBlock.LinkWithCompletion(readBlock); return(new StartableBlock <DataBatch <Person> >( () => Start(batchSize, batchesCount.Value, bufferBlock), readBlock, batchesCount)); }
public StartableBlock <Data> Create(string peopleJsonFilePath, string targetFilePath, string errorsFilePath, IProgress <PipelineProgress> progress, CancellationTokenSource cancellationSource) { var dataPool = new DataPool(); // Create blocks // TODO: Progress reporting approach 1: before anything var readBlock = _readingBlockFactory.Create(peopleJsonFilePath, dataPool, cancellationSource.Token); var validateBlock = new ProcessingBlock <Data>(DataflowFacade.TransformBlock("Validate", Data.IdGetter, x => _personValidator.Validate(x), cancellationSource.Token, Settings.DegreeOfParallelism)); var computeFieldsBlock = new ProcessingBlock <Data>(DataflowFacade.TransformBlock("ComputeFields", Data.IdGetter, x => _personFieldsComputer.Compute(x), cancellationSource.Token, Settings.DegreeOfParallelism)); var writeBlock = _writingBlockFactory.Create(targetFilePath, cancellationSource.Token); var throwBlock = Settings.ThrowTest ? _throwingBlockFactory.Create(Data.IdGetter, cancellationSource.Token) : _emptyBlockFactory.Create <Data>(cancellationSource.Token); var handleErrorBlock = _writingBlockFactory.Create(errorsFilePath, cancellationSource.Token); var progressBlock = _progressReportingBlockFactory.Create("ReportProgress", Data.IdGetter, progress, readBlock.EstimatedOutputCount, Settings.ProgressBatchSize, cancellationSource.Token); return(_railroadPipelineFactory.Create(readBlock, new[] { validateBlock, computeFieldsBlock, writeBlock, throwBlock }, handleErrorBlock, progressBlock, x => x.IsValid, cancellationSource)); }
public StartableBlock <DataBatch> Create(string peopleJsonFilePath, string targetFilePath, IProgress <PipelineProgress> progress, CancellationTokenSource cancellationSource) { var dataPool = new DataPool(); // Create blocks var readBlock = _readingBlockFactory.Create(peopleJsonFilePath, dataPool, cancellationSource.Token); var processBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("Process", DataBatch.IdGetter, x => { x.Data.Where(item => item.IsValid).ForEach(_personValidator.Validate); x.Data.Where(item => item.IsValid).ForEach(_personFieldsComputer.Compute); for (var i = 0; i < Settings.ExtraProcessingBlocksCount; i++) { x.Data.ForEach(_ => ComputationsHelper.PerformTimeConsumingOperation()); } }, cancellationSource.Token, Settings.DegreeOfParallelism)); var writeBlock = _writingBlockFactory.Create(targetFilePath, cancellationSource.Token); var progressBlock = _progressReportingBlockFactory.Create("ReportProgress", DataBatch.IdGetter, progress, readBlock.EstimatedOutputCount, 1, cancellationSource.Token); var disposeBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("DisposeData", DataBatch.IdGetter, x => x.Data.ForEach(dataPool.Return), cancellationSource.Token)); return(_straightPipelineFactory.Create(readBlock, new[] { processBlock, writeBlock, progressBlock, disposeBlock }, cancellationSource)); }
public StartableBlock <DataBatch <PeopleSummary> > Create(CancellationToken cancellation) { var readBlock = DataflowFacade.TransformBlock <DataBatch <PeopleSummary> >("ReadPeopleSummary", DataBatch <PeopleSummary> .IdGetter, x => x.Data = new[] { _peopleSummaryReader.Read() }, cancellation); return(new StartableBlock <DataBatch <PeopleSummary> >( () => { readBlock.Post(new DataBatch <PeopleSummary> { Number = -1, DataOffset = 0, IntendedSize = 1 }); readBlock.Complete(); }, readBlock, 1)); }
public StartableBlock <DataBatch> Create(string peopleJsonFilePath, string targetFilePath, IProgress <PipelineProgress> progress, CancellationTokenSource cancellationSource) { var dataPool = new DataPool(); // Create blocks var readBlock = _readingBlockFactory.Create(peopleJsonFilePath, dataPool, cancellationSource.Token); var validateBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("Validate", DataBatch.IdGetter, x => x.Data.Where(item => item.IsValid).ForEach(_personValidator.Validate), cancellationSource.Token, Settings.DegreeOfParallelism)); var computeFieldsBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("ComputeFields", DataBatch.IdGetter, x => x.Data.Where(item => item.IsValid).ForEach(_personFieldsComputer.Compute), cancellationSource.Token, Settings.DegreeOfParallelism)); var extraProcessingBlocks = CreateExtraProcessingBlocks(cancellationSource); var writeBlock = _writingBlockFactory.Create(targetFilePath, cancellationSource.Token); var progressBlock = _progressReportingBlockFactory.Create("ReportProgress", DataBatch.IdGetter, progress, readBlock.EstimatedOutputCount, 1, cancellationSource.Token); var disposeBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("DisposeData", DataBatch.IdGetter, x => x.Data.ForEach(dataPool.Return), cancellationSource.Token)); return(_straightPipelineFactory.Create(readBlock, new[] { validateBlock, computeFieldsBlock }.Concat(extraProcessingBlocks) .Concat(new[] { writeBlock, progressBlock, disposeBlock }) .ToArray(), cancellationSource)); }