Ejemplo n.º 1
0
        private async Task StartCrawler(ISubmissionCrawler crawler)
        {
            var oj = crawler.OnlineJudge;

            long?latestSubmissionId;

            using (var scope = _serviceProvider.CreateScope())
            {
                var context = scope.ServiceProvider.GetRequiredService <OHuntDbContext>();
                latestSubmissionId = (await context.Submission
                                      .Where(e => e.OnlineJudgeId == oj)
                                      .OrderByDescending(e => e.SubmissionId)
                                      .FirstOrDefaultAsync(_cancel.Token))?.SubmissionId;
            }

            _logger.LogTrace("Work on {0}, latestSubmissionId {1}", oj.ToString(), latestSubmissionId);

            var submissionTransformer = CreateTransformer <Submission>();
            var errorTransformer      = CreateTransformer <CrawlerError>();

            using var submissionUnlink = submissionTransformer.LinkTo(_submissionInserter);
            using var errorUnlink      = errorTransformer.LinkTo(_errorInserter);

            var propagator = new CrawlerPropagator(submissionTransformer, errorTransformer);

            try
            {
                await crawler.WorkAsync(latestSubmissionId, propagator, _cancel.Token);

                await propagator.SendAsync(new CrawlerMessage
                {
                    Checkpoint = true,
                });

                propagator.Complete();
            }
            catch (Exception e)
            {
                _logger.LogError(e, $"Exception when running crawler {oj.ToString()}");

                // data from last checkpoint is automatically discarded
                propagator.Complete();

                // TODO: add entity CrawlerExecuteLog , save the execution time and result of
                // the crawler
            }

            await propagator.Completion;
            await submissionTransformer.Completion;
            await errorTransformer.Completion;

            // TODO: call this after all crawler finished or after 30 minutes
            await _submissionInserter.SendAsync(DatabaseInserterMessage <Submission> .ForceInsertMessage);

            await _errorInserter.SendAsync(DatabaseInserterMessage <CrawlerError> .ForceInsertMessage);
        }
        public async Task WorkAsync(
            ISubmissionCrawler crawler,
            CancellationToken cancellationToken)
        {
            var oj = crawler.OnlineJudge;

            long?latestSubmissionId;

            using (var scope = _serviceProvider.CreateScope())
            {
                var context = scope.ServiceProvider.GetRequiredService <OHuntWebContext>();
                latestSubmissionId = (await context.Submission
                                      .Where(e => e.OnlineJudgeId == oj)
                                      .OrderByDescending(e => e.SubmissionId)
                                      .FirstOrDefaultAsync(cancellationToken: cancellationToken))?.SubmissionId;
            }

            var submissionBuffer
                = new BufferBlock <Submission>(new DataflowBlockOptions
            {
                BoundedCapacity = BufferCapacity,
                EnsureOrdered   = false,
            });
            var errorBuffer
                = new BufferBlock <CrawlerError>(new DataflowBlockOptions
            {
                BoundedCapacity = BufferCapacity,
                EnsureOrdered   = false,
            });

            _logger.LogTrace("Work on {0}, latestSubmissionId {1}", oj.ToString(), latestSubmissionId);

            var inserterCancel = new CancellationTokenSource();
            var crawlerCancel  = new CancellationTokenSource();

            // cancel crawler, it may trigger crawler to submit a Complete
            // or it just throws, the catch below cancels the inserter
            cancellationToken.Register(() => { crawlerCancel.Cancel(); });

            var crawlerTask            = crawler.WorkAsync(latestSubmissionId, submissionBuffer, errorBuffer, crawlerCancel.Token);
            var submissionInserterTask = _submissionInserter.WorkAsync(submissionBuffer, inserterCancel.Token);
            var errorInserterTask      = _errorInserter.WorkAsync(errorBuffer, inserterCancel.Token);

            try
            {
                await crawlerTask;
                await submissionInserterTask;
                await errorInserterTask;
            }
            catch (Exception e)
            {
                inserterCancel.Cancel();
                _logger.LogError(e, "Exception when crawling");
            }
        }