Esempio n. 1
0
 private void CheckWorkDone(ProcessingBlock dataBlock)
 {
     if (dataBlock.IsLastBlock)
     {
         _context.ConsumerEventsManager.OnWorkDone();
     }
 }
Esempio n. 2
0
        private StartableBlock <DataBatch <Person> > CreatePeoplePipeline(string targetFilePath, PeopleSummary builtSummary, IProgress <PipelineProgress> progress, CancellationToken cancellation)
        {
            var cancellationSource = CancellationTokenSource.CreateLinkedTokenSource(cancellation);

            // Create blocks
            var readBlock         = _readPeopleBlockFactory.Create(cancellationSource.Token);
            var writeBlock        = _writeCsvBlockFactory.Create <Person>(targetFilePath, true, cancellationSource.Token);
            var buildSummaryBlock = new ProcessingBlock <DataBatch <Person> >(DataflowFacade.TransformBlock("BuildSummary",
                                                                                                            DataBatch <Person> .IdGetter,
                                                                                                            x => x.Data.ForEach(person => _peopleSummaryBuilder.Include(person, builtSummary)),
                                                                                                            cancellationSource.Token));
            var progressBlock = _progressReportingBlockFactory.Create("PersonProgress",
                                                                      DataBatch <Person> .IdGetter,
                                                                      progress,
                                                                      readBlock.EstimatedOutputCount,
                                                                      1,
                                                                      cancellationSource.Token);

            // Create pipeline
            var pipeline = _straightPipelineFactory.Create(readBlock,
                                                           new[] { writeBlock, buildSummaryBlock, progressBlock },
                                                           cancellationSource);

            pipeline.ContinueCompletionWith(_ => cancellationSource.Dispose());

            return(pipeline);
        }
Esempio n. 3
0
        public StartableBlock <DataBatch> Create(string peopleJsonFilePath,
                                                 string targetFilePath,
                                                 IProgress <PipelineProgress> progress,
                                                 CancellationTokenSource cancellationSource)
        {
            var dataPool = new DataPool();

            // Create blocks
            var readBlock     = _readingBlockFactory.Create(peopleJsonFilePath, dataPool, cancellationSource.Token);
            var validateBlock = ProcessingBlock <DataBatch> .Create("Validate",
                                                                    DataBatch.IdGetter,
                                                                    x => x.Data.Where(item => item.IsValid).ForEach(_personValidator.Validate),
                                                                    cancellationSource.Token,
                                                                    Settings.ProcessInParallel?Settings.MaxDegreeOfParallelism : 1);

            var computeFieldsBlock = ProcessingBlock <DataBatch> .Create("ComputeFields",
                                                                         DataBatch.IdGetter,
                                                                         x => x.Data.Where(item => item.IsValid).ForEach(_personFieldsComputer.Compute),
                                                                         cancellationSource.Token,
                                                                         Settings.ProcessInParallel?Settings.MaxDegreeOfParallelism : 1);

            var extraProcessingBlocks = CreateExtraProcessingBlocks(cancellationSource);
            var writeBlock            = _writingBlockFactory.Create(targetFilePath, cancellationSource.Token);
            var progressBlock         = _progressReportingBlockFactory.Create(DataBatch.IdGetter, progress, readBlock.EstimatedOutputCount, 1, cancellationSource.Token);
            var disposeBlock          = ProcessingBlock <DataBatch> .Create("DisposeData",
                                                                            DataBatch.IdGetter,
                                                                            x => x.Data.ForEach(dataPool.Return),
                                                                            cancellationSource.Token);

            return(_straightPipelineFactory.Create(readBlock,
                                                   new[] { validateBlock, computeFieldsBlock }.Concat(extraProcessingBlocks)
                                                   .Concat(new[] { writeBlock, progressBlock, disposeBlock })
                                                   .ToArray(),
                                                   cancellationSource));
        }
Esempio n. 4
0
        protected static bool CheckMinMax(ProcessingBlock filter, Option option, int value, out int min, out int max)
        {
            max = (int)filter.Options[option].Max;
            min = (int)filter.Options[option].Min;

            return(value <= max && value >= min);
        }
Esempio n. 5
0
 private IEnumerable <ProcessingBlock <DataBatch> > CreateExtraProcessingBlocks(CancellationTokenSource cancellationSource)
 {
     return(Enumerable.Range(1, Settings.ExtraProcessingBlocksCount)
            .Select(x => ProcessingBlock <DataBatch> .Create($"ExtraProcessing {x}",
                                                             DataBatch.IdGetter,
                                                             batch => batch.Data.ForEach(_ => ComputationsHelper.PerformTimeConsumingOperation()),
                                                             cancellationSource.Token,
                                                             Settings.ProcessInParallel ? Settings.MaxDegreeOfParallelism : 1)));
 }
        public void Complete()
        {
            ProcessingBlock.Complete();

            WebsiteDefinition.CrawlResult.NumberOfPagesCrawled = WebsiteDefinition.ProcessedUrlsCount;
            WebsiteDefinition.CrawlResult.CrawlEnded           = DateTime.Now;

            CompletionSource.SetResult(WebsiteDefinition.CrawlResult);
        }
Esempio n. 7
0
        public StartableBlock <TData> Create <TData>(StartableBlock <TData> source,
                                                     ProcessingBlock <TData>[] processors,
                                                     ProcessingBlock <TData> errorHandler,
                                                     ProcessingBlock <TData> output,
                                                     Predicate <TData> validDataPredicate,
                                                     CancellationTokenSource cancellationSource)
        {
            // The pipeline looks like this:
            // source -> processor1 -> processor2 -> output
            //       \      |          |             ^
            //        \     v          v            /
            //         \--> errorHandler ----------/

            // Link blocks
            source.Output.LinkWithCompletion(processors[0].Processor, validDataPredicate);
            source.Output.LinkTo(errorHandler.Processor, x => !validDataPredicate(x));

            for (var i = 0; i < processors.Length - 1; i++)
            {
                processors[i].Processor.LinkWithCompletion(processors[i + 1].Processor, validDataPredicate);
                processors[i].Processor.LinkTo(errorHandler.Processor, x => !validDataPredicate(x));
            }

            var lastProcessor = processors.Last();

            lastProcessor.Processor.LinkTo(output.Processor, validDataPredicate);
            lastProcessor.Processor.LinkTo(errorHandler.Processor, x => !validDataPredicate(x));

            errorHandler.Processor.LinkTo(output.Processor);

            // Propagate completions of multiple inputs
            errorHandler.Processor.DeriveCompletionOrFaultFrom(new[] { source.Output }.Concat(processors.Select(x => x.Processor)));
            output.Processor.DeriveCompletionOrFaultFrom(lastProcessor.Processor, errorHandler.Processor);

            // Create global completion
            var completion = TaskExtensions.CreateGlobalCompletion(new[] { source.Completion }.Concat(processors.Select(x => x.Completion))
                                                                   .Concat(new[] { errorHandler.Completion, output.Completion }),
                                                                   cancellationSource);

            return(new StartableBlock <TData>
            {
                Start = source.Start,
                Output = output.Processor,
                EstimatedOutputCount = source.EstimatedOutputCount,
                Completion = completion
            });
        }
Esempio n. 8
0
        public StartableBlock <Data> Create(string peopleJsonFilePath,
                                            string targetFilePath,
                                            string errorsFilePath,
                                            IProgress <PipelineProgress> progress,
                                            CancellationTokenSource cancellationSource)
        {
            var dataPool = new DataPool();

            // Create blocks
            // TODO: Progress reporting approach 1: before anything
            var readBlock     = _readingBlockFactory.Create(peopleJsonFilePath, dataPool, cancellationSource.Token);
            var validateBlock = new ProcessingBlock <Data>(DataflowFacade.TransformBlock("Validate",
                                                                                         Data.IdGetter,
                                                                                         x => _personValidator.Validate(x),
                                                                                         cancellationSource.Token,
                                                                                         Settings.DegreeOfParallelism));
            var computeFieldsBlock = new ProcessingBlock <Data>(DataflowFacade.TransformBlock("ComputeFields",
                                                                                              Data.IdGetter,
                                                                                              x => _personFieldsComputer.Compute(x),
                                                                                              cancellationSource.Token,
                                                                                              Settings.DegreeOfParallelism));
            var writeBlock = _writingBlockFactory.Create(targetFilePath, cancellationSource.Token);
            var throwBlock = Settings.ThrowTest
                                 ? _throwingBlockFactory.Create(Data.IdGetter, cancellationSource.Token)
                                 : _emptyBlockFactory.Create <Data>(cancellationSource.Token);
            var handleErrorBlock = _writingBlockFactory.Create(errorsFilePath, cancellationSource.Token);
            var progressBlock    = _progressReportingBlockFactory.Create("ReportProgress",
                                                                         Data.IdGetter,
                                                                         progress,
                                                                         readBlock.EstimatedOutputCount,
                                                                         Settings.ProgressBatchSize,
                                                                         cancellationSource.Token);

            return(_railroadPipelineFactory.Create(readBlock,
                                                   new[] { validateBlock, computeFieldsBlock, writeBlock, throwBlock },
                                                   handleErrorBlock,
                                                   progressBlock,
                                                   x => x.IsValid,
                                                   cancellationSource));
        }
Esempio n. 9
0
        public StartableBlock <DataBatch> Create(string peopleJsonFilePath,
                                                 string targetFilePath,
                                                 IProgress <PipelineProgress> progress,
                                                 CancellationTokenSource cancellationSource)
        {
            var dataPool = new DataPool();

            // Create blocks
            var readBlock    = _readingBlockFactory.Create(peopleJsonFilePath, dataPool, cancellationSource.Token);
            var processBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("Process",
                                                                                             DataBatch.IdGetter,
                                                                                             x =>
            {
                x.Data.Where(item => item.IsValid).ForEach(_personValidator.Validate);
                x.Data.Where(item => item.IsValid).ForEach(_personFieldsComputer.Compute);

                for (var i = 0; i < Settings.ExtraProcessingBlocksCount; i++)
                {
                    x.Data.ForEach(_ => ComputationsHelper.PerformTimeConsumingOperation());
                }
            },
                                                                                             cancellationSource.Token,
                                                                                             Settings.DegreeOfParallelism));
            var writeBlock    = _writingBlockFactory.Create(targetFilePath, cancellationSource.Token);
            var progressBlock = _progressReportingBlockFactory.Create("ReportProgress",
                                                                      DataBatch.IdGetter,
                                                                      progress,
                                                                      readBlock.EstimatedOutputCount,
                                                                      1,
                                                                      cancellationSource.Token);
            var disposeBlock = new ProcessingBlock <DataBatch>(DataflowFacade.TransformBlock("DisposeData",
                                                                                             DataBatch.IdGetter,
                                                                                             x => x.Data.ForEach(dataPool.Return),
                                                                                             cancellationSource.Token));

            return(_straightPipelineFactory.Create(readBlock,
                                                   new[] { processBlock, writeBlock, progressBlock, disposeBlock },
                                                   cancellationSource));
        }
Esempio n. 10
0
 private void TryRecyclingBlock(out ProcessingBlock block, int blockNumber)
 {
     if ( _recycling.TryDequeue(out block) ) {
         for ( int i = 0; i < block.InputData.Length; i++ ) {
             block.InputData[i] = null;
             block.OutputData.Clear();
             block.Ready.Reset();
         }
     } else {
         block = new ProcessingBlock();
     }
     block.BlockNumber = blockNumber;
 }
 public int Post(CrawlUrl crawlUrl)
 {
     ProcessingBlock.Post(crawlUrl);
     return(ProcessingBlock.InputCount);
 }
Esempio n. 12
0
 public void Post(IFaceStream stream)
 {
     ProcessingBlock.Post(stream);
 }