示例#1
0
        private IPropagatorBlock <int, Data> UseLinesReaderAndParser(StreamReader peopleJsonStream, DataPool dataPool, CancellationToken cancellation)
        {
            // Create blocks

            // NOTE:
            // - extract part which must be single-thread
            // - ability to replace just reading (e.g. from db)
            var readLinesBlock = DataflowFacade.TransformManyBlock <int, Data>(
                "ReadLines",
                x => - 1,
                x => _streamLinesReader.Read(peopleJsonStream, x)
                .Select(line =>
            {
                var data        = dataPool.Rent();
                data.PersonJson = line;

                return(data);
            }),
                cancellation);

            // NOTE: can be multi-thread
            var parseDataBlock = DataflowFacade.TransformBlock(
                "ParseData",
                Data.IdGetter,
                x => _dataParser.Parse(x),
                cancellation);

            // Link blocks
            readLinesBlock.LinkWithCompletion(parseDataBlock);

            return(DataflowBlock.Encapsulate(readLinesBlock, parseDataBlock));
        }
示例#2
0
        private StartableBlock <DataBatch <Person> > CreatePeoplePipeline(string targetFilePath, PeopleSummary builtSummary, IProgress <PipelineProgress> progress, CancellationToken cancellation)
        {
            var cancellationSource = CancellationTokenSource.CreateLinkedTokenSource(cancellation);

            // Create blocks
            var readBlock         = _readPeopleBlockFactory.Create(cancellationSource.Token);
            var writeBlock        = _writeCsvBlockFactory.Create <Person>(targetFilePath, true, cancellationSource.Token);
            var buildSummaryBlock = new ProcessingBlock <DataBatch <Person> >(DataflowFacade.TransformBlock("BuildSummary",
                                                                                                            DataBatch <Person> .IdGetter,
                                                                                                            x => x.Data.ForEach(person => _peopleSummaryBuilder.Include(person, builtSummary)),
                                                                                                            cancellationSource.Token));
            var progressBlock = _progressReportingBlockFactory.Create("PersonProgress",
                                                                      DataBatch <Person> .IdGetter,
                                                                      progress,
                                                                      readBlock.EstimatedOutputCount,
                                                                      1,
                                                                      cancellationSource.Token);

            // Create pipeline
            var pipeline = _straightPipelineFactory.Create(readBlock,
                                                           new[] { writeBlock, buildSummaryBlock, progressBlock },
                                                           cancellationSource);

            pipeline.ContinueCompletionWith(_ => cancellationSource.Dispose());

            return(pipeline);
        }
示例#3
0
        public ProcessingBlock <TData> Create <TData>(CancellationToken cancellation)
        {
            // Create blocks
            var emptyBlock = DataflowFacade.BufferBlock <TData>(cancellation, 1);

            return(new ProcessingBlock <TData>(emptyBlock));
        }
示例#4
0
        public ProcessingBlock <TData> Create <TData>(string name,
                                                      Func <TData, int> dataIdGetter,
                                                      IProgress <PipelineProgress> progress,
                                                      Lazy <int> estimatedInputCount,
                                                      int inputPerReport,
                                                      CancellationToken cancellation)
        {
            var state = new State();

            // Create blocks
            var reportBlock = DataflowFacade.TransformBlock(
                name,
                dataIdGetter,
                x => TryReport(state, estimatedInputCount.Value, inputPerReport, progress),
                cancellation);

            // Handle completion
            var completion = reportBlock.Completion.ContinueWithStatusPropagation(
                x =>
            {
                if (!x.IsFaulted && !x.IsCanceled && !state.Reported100)
                {
                    progress.Report(new PipelineProgress {
                        Percentage = 100
                    });
                }
            });

            return(new ProcessingBlock <TData>(reportBlock, completion));
        }
示例#5
0
        public StartableBlock <Data> Create(string peopleJsonFilePath, DataPool dataPool, CancellationToken cancellation)
        {
            var batchSize    = Settings.ReadingBatchSize;
            var peopleCount  = _fileLinesCounter.Count(peopleJsonFilePath);
            var batchesCount = peopleCount.CeilingOfDivisionBy(batchSize);

            var peopleJsonStream = File.OpenText(peopleJsonFilePath);

            // Create blocks
            var bufferBlock = DataflowFacade.BufferBlock <int>(cancellation);
            var readBlock   = Settings.SplitReadingIntoTwoSteps
                                ? UseLinesReaderAndParser(peopleJsonStream, dataPool, cancellation)
                                : UseDataReader(peopleJsonStream, dataPool, cancellation);

            //  Link blocks
            bufferBlock.LinkWithCompletion(readBlock);

            // Handle completion
            var completion = readBlock.Completion.ContinueWithStatusPropagation(_ => peopleJsonStream.Dispose());

            return(new StartableBlock <Data>(
                       () =>
            {
                for (var i = 0; i < batchesCount; i++)
                {
                    bufferBlock.Post(batchSize);
                }

                bufferBlock.Complete();
            },
                       readBlock,
                       peopleCount,
                       completion));
        }
示例#6
0
        public ProcessingBlock <DataBatch> Create(string targetFilePath, CancellationToken cancellation)
        {
            var targetDirectoryPath = Path.GetDirectoryName(targetFilePath);

            if (!Directory.Exists(targetDirectoryPath))
            {
                Directory.CreateDirectory(targetDirectoryPath);
            }

            var writer = new StreamWriter(targetFilePath);

            // Create blocks
            var writeBlock = DataflowFacade.TransformBlock(
                "WriteData",
                DataBatch.IdGetter,
                x => x.Data.ForEach(item => _dataWriter.Write(writer, item)),
                cancellation);

            // Handle completion
            var completion = writeBlock.Completion.ContinueWithStatusPropagation(_ => writer.Dispose());

            return(new ProcessingBlock <DataBatch>
            {
                Processor = writeBlock,
                Completion = completion
            });
        }
示例#7
0
 private IPropagatorBlock <int, Data> UseDataReader(StreamReader peopleJsonStream, DataPool dataPool, CancellationToken cancellation)
 {
     return(DataflowFacade.TransformManyBlock <int, Data>(
                "ReadData",
                x => - 1,
                x => _dataReader.Read(peopleJsonStream, x, dataPool),
                cancellation));
 }
示例#8
0
 private IEnumerable <ProcessingBlock <DataBatch> > CreateExtraProcessingBlocks(CancellationTokenSource cancellationSource)
 {
     return(Enumerable.Range(1, Settings.ExtraProcessingBlocksCount)
            .Select(x => new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock($"ExtraProcessing {x}",
                                                                                       DataBatch.IdGetter,
                                                                                       batch => batch.Data.ForEach(_ => ComputationsHelper.PerformTimeConsumingOperation()),
                                                                                       cancellationSource.Token,
                                                                                       Settings.DegreeOfParallelism))));
 }
示例#9
0
        public ProcessingBlock <TData> Create <TData>(CancellationToken cancellation)
        {
            // Create blocks
            var emptyBlock = DataflowFacade.BufferBlock <TData>(cancellation, 1);

            return(new ProcessingBlock <TData>
            {
                Processor = emptyBlock,
                Completion = emptyBlock.Completion
            });
        }
示例#10
0
 private IPropagatorBlock <DataBatch, DataBatch> UseDataReader(StreamReader peopleJsonStream, DataPool dataPool, CancellationToken cancellation)
 {
     return(DataflowFacade.TransformBlock <DataBatch, DataBatch>(
                "ReadData",
                DataBatch.IdGetter,
                x =>
     {
         x.Data = _dataReader.Read(peopleJsonStream, x.IntendedSize, dataPool).ToList();
         return x;
     },
                cancellation));
 }
示例#11
0
        public StartableBlock <TOutput> Create <TOutput>(Func <TOutput> itemGetter, CancellationToken cancellation)
        {
            var sourceBuffer = DataflowFacade.BufferBlock <TOutput>(cancellation);

            return(new StartableBlock <TOutput>(() =>
            {
                var item = itemGetter();
                sourceBuffer.Post(item);
                sourceBuffer.Complete();
            },
                                                sourceBuffer,
                                                1));
        }
示例#12
0
        public ProcessingBlock <TData> Create <TData>(Func <TData, int> dataIdGetter, CancellationToken cancellation)
        {
            // Create blocks
            var throwBlock = DataflowFacade.TransformBlock(
                "Throw",
                dataIdGetter,
                x =>
            {
                throw new InvalidOperationException();
            },
                cancellation);

            return(new ProcessingBlock <TData>(throwBlock));
        }
示例#13
0
        public static ProcessingBlock <TData> Create(string name,
                                                     Func <TData, int> dataIdGetter,
                                                     Func <TData, TData> process,
                                                     CancellationToken cancellation,
                                                     int maxDegreeOfParallelism = 1)
        {
            var processor = DataflowFacade.TransformBlock <TData>(name, dataIdGetter, process, cancellation, maxDegreeOfParallelism);

            return(new ProcessingBlock <TData>
            {
                Processor = processor,
                Completion = processor.Completion
            });
        }
示例#14
0
        public ProcessingBlock <DataBatch <TItem> > Create <TItem>(string targetFilePath, bool appendTargetFile, CancellationToken cancellation)
        {
            var csvWriter = new Lazy <CsvWriter>(() => CreateCsvWriter(targetFilePath, appendTargetFile));

            // Create blocks
            var writeBlock = DataflowFacade.TransformBlock($"Write{typeof(TItem).Name}",
                                                           DataBatch <TItem> .IdGetter,
                                                           x => csvWriter.Value.WriteRecords(x.Data),
                                                           cancellation);

            // Handle completion
            var completion = writeBlock.Completion.ContinueWithStatusPropagation(_ => csvWriter.ValueIfCreated()?.Dispose());

            return(new ProcessingBlock <DataBatch <TItem> >(writeBlock, completion));
        }
示例#15
0
        public StartableBlock <DataBatch <Person> > Create(CancellationToken cancellation)
        {
            var batchSize    = Settings.ReadingBatchSize;
            var batchesCount = new Lazy <int>(() => GetBatchesCount(batchSize));

            // Create blocks
            var bufferBlock = DataflowFacade.BufferBlock <DataBatch <Person> >(cancellation);

            var readBlock = DataflowFacade.TransformBlock <DataBatch <Person> >("ReadPerson",
                                                                                DataBatch <Person> .IdGetter,
                                                                                x => x.Data = _peopleBatchReader.Read(x.DataOffset, x.IntendedSize),
                                                                                cancellation);

            // Link blocks
            bufferBlock.LinkWithCompletion(readBlock);

            return(new StartableBlock <DataBatch <Person> >(
                       () => Start(batchSize, batchesCount.Value, bufferBlock),
                       readBlock,
                       batchesCount));
        }
示例#16
0
        public StartableBlock <Data> Create(string peopleJsonFilePath,
                                            string targetFilePath,
                                            string errorsFilePath,
                                            IProgress <PipelineProgress> progress,
                                            CancellationTokenSource cancellationSource)
        {
            var dataPool = new DataPool();

            // Create blocks
            // TODO: Progress reporting approach 1: before anything
            var readBlock     = _readingBlockFactory.Create(peopleJsonFilePath, dataPool, cancellationSource.Token);
            var validateBlock = new ProcessingBlock <Data>(DataflowFacade.TransformBlock("Validate",
                                                                                         Data.IdGetter,
                                                                                         x => _personValidator.Validate(x),
                                                                                         cancellationSource.Token,
                                                                                         Settings.DegreeOfParallelism));
            var computeFieldsBlock = new ProcessingBlock <Data>(DataflowFacade.TransformBlock("ComputeFields",
                                                                                              Data.IdGetter,
                                                                                              x => _personFieldsComputer.Compute(x),
                                                                                              cancellationSource.Token,
                                                                                              Settings.DegreeOfParallelism));
            var writeBlock = _writingBlockFactory.Create(targetFilePath, cancellationSource.Token);
            var throwBlock = Settings.ThrowTest
                                 ? _throwingBlockFactory.Create(Data.IdGetter, cancellationSource.Token)
                                 : _emptyBlockFactory.Create <Data>(cancellationSource.Token);
            var handleErrorBlock = _writingBlockFactory.Create(errorsFilePath, cancellationSource.Token);
            var progressBlock    = _progressReportingBlockFactory.Create("ReportProgress",
                                                                         Data.IdGetter,
                                                                         progress,
                                                                         readBlock.EstimatedOutputCount,
                                                                         Settings.ProgressBatchSize,
                                                                         cancellationSource.Token);

            return(_railroadPipelineFactory.Create(readBlock,
                                                   new[] { validateBlock, computeFieldsBlock, writeBlock, throwBlock },
                                                   handleErrorBlock,
                                                   progressBlock,
                                                   x => x.IsValid,
                                                   cancellationSource));
        }
示例#17
0
        public StartableBlock <DataBatch <PeopleSummary> > Create(CancellationToken cancellation)
        {
            var readBlock = DataflowFacade.TransformBlock <DataBatch <PeopleSummary> >("ReadPeopleSummary",
                                                                                       DataBatch <PeopleSummary> .IdGetter,
                                                                                       x => x.Data = new[] { _peopleSummaryReader.Read() },
                                                                                       cancellation);

            return(new StartableBlock <DataBatch <PeopleSummary> >(
                       () =>
            {
                readBlock.Post(new DataBatch <PeopleSummary>
                {
                    Number = -1,
                    DataOffset = 0,
                    IntendedSize = 1
                });

                readBlock.Complete();
            },
                       readBlock,
                       1));
        }
示例#18
0
        public StartableBlock <DataBatch> Create(string peopleJsonFilePath,
                                                 string targetFilePath,
                                                 IProgress <PipelineProgress> progress,
                                                 CancellationTokenSource cancellationSource)
        {
            var dataPool = new DataPool();

            // Create blocks
            var readBlock    = _readingBlockFactory.Create(peopleJsonFilePath, dataPool, cancellationSource.Token);
            var processBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("Process",
                                                                                             DataBatch.IdGetter,
                                                                                             x =>
            {
                x.Data.Where(item => item.IsValid).ForEach(_personValidator.Validate);
                x.Data.Where(item => item.IsValid).ForEach(_personFieldsComputer.Compute);

                for (var i = 0; i < Settings.ExtraProcessingBlocksCount; i++)
                {
                    x.Data.ForEach(_ => ComputationsHelper.PerformTimeConsumingOperation());
                }
            },
                                                                                             cancellationSource.Token,
                                                                                             Settings.DegreeOfParallelism));
            var writeBlock    = _writingBlockFactory.Create(targetFilePath, cancellationSource.Token);
            var progressBlock = _progressReportingBlockFactory.Create("ReportProgress",
                                                                      DataBatch.IdGetter,
                                                                      progress,
                                                                      readBlock.EstimatedOutputCount,
                                                                      1,
                                                                      cancellationSource.Token);
            var disposeBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("DisposeData",
                                                                                             DataBatch.IdGetter,
                                                                                             x => x.Data.ForEach(dataPool.Return),
                                                                                             cancellationSource.Token));

            return(_straightPipelineFactory.Create(readBlock,
                                                   new[] { processBlock, writeBlock, progressBlock, disposeBlock },
                                                   cancellationSource));
        }
示例#19
0
        public StartableBlock <DataBatch> Create(string peopleJsonFilePath,
                                                 string targetFilePath,
                                                 IProgress <PipelineProgress> progress,
                                                 CancellationTokenSource cancellationSource)
        {
            var dataPool = new DataPool();

            // Create blocks
            var readBlock     = _readingBlockFactory.Create(peopleJsonFilePath, dataPool, cancellationSource.Token);
            var validateBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("Validate",
                                                                                              DataBatch.IdGetter,
                                                                                              x => x.Data.Where(item => item.IsValid).ForEach(_personValidator.Validate),
                                                                                              cancellationSource.Token,
                                                                                              Settings.DegreeOfParallelism));
            var computeFieldsBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("ComputeFields",
                                                                                                   DataBatch.IdGetter,
                                                                                                   x => x.Data.Where(item => item.IsValid).ForEach(_personFieldsComputer.Compute),
                                                                                                   cancellationSource.Token,
                                                                                                   Settings.DegreeOfParallelism));
            var extraProcessingBlocks = CreateExtraProcessingBlocks(cancellationSource);
            var writeBlock            = _writingBlockFactory.Create(targetFilePath, cancellationSource.Token);
            var progressBlock         = _progressReportingBlockFactory.Create("ReportProgress",
                                                                              DataBatch.IdGetter,
                                                                              progress,
                                                                              readBlock.EstimatedOutputCount,
                                                                              1,
                                                                              cancellationSource.Token);
            var disposeBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("DisposeData",
                                                                                             DataBatch.IdGetter,
                                                                                             x => x.Data.ForEach(dataPool.Return),
                                                                                             cancellationSource.Token));

            return(_straightPipelineFactory.Create(readBlock,
                                                   new[] { validateBlock, computeFieldsBlock }.Concat(extraProcessingBlocks)
                                                   .Concat(new[] { writeBlock, progressBlock, disposeBlock })
                                                   .ToArray(),
                                                   cancellationSource));
        }