private async Task StartCrawler(ISubmissionCrawler crawler) { var oj = crawler.OnlineJudge; long?latestSubmissionId; using (var scope = _serviceProvider.CreateScope()) { var context = scope.ServiceProvider.GetRequiredService <OHuntDbContext>(); latestSubmissionId = (await context.Submission .Where(e => e.OnlineJudgeId == oj) .OrderByDescending(e => e.SubmissionId) .FirstOrDefaultAsync(_cancel.Token))?.SubmissionId; } _logger.LogTrace("Work on {0}, latestSubmissionId {1}", oj.ToString(), latestSubmissionId); var submissionTransformer = CreateTransformer <Submission>(); var errorTransformer = CreateTransformer <CrawlerError>(); using var submissionUnlink = submissionTransformer.LinkTo(_submissionInserter); using var errorUnlink = errorTransformer.LinkTo(_errorInserter); var propagator = new CrawlerPropagator(submissionTransformer, errorTransformer); try { await crawler.WorkAsync(latestSubmissionId, propagator, _cancel.Token); await propagator.SendAsync(new CrawlerMessage { Checkpoint = true, }); propagator.Complete(); } catch (Exception e) { _logger.LogError(e, $"Exception when running crawler {oj.ToString()}"); // data from last checkpoint is automatically discarded propagator.Complete(); // TODO: add entity CrawlerExecuteLog , save the execution time and result of // the crawler } await propagator.Completion; await submissionTransformer.Completion; await errorTransformer.Completion; // TODO: call this after all crawler finished or after 30 minutes await _submissionInserter.SendAsync(DatabaseInserterMessage <Submission> .ForceInsertMessage); await _errorInserter.SendAsync(DatabaseInserterMessage <CrawlerError> .ForceInsertMessage); }
public async Task WorkAsync( ISubmissionCrawler crawler, CancellationToken cancellationToken) { var oj = crawler.OnlineJudge; long?latestSubmissionId; using (var scope = _serviceProvider.CreateScope()) { var context = scope.ServiceProvider.GetRequiredService <OHuntWebContext>(); latestSubmissionId = (await context.Submission .Where(e => e.OnlineJudgeId == oj) .OrderByDescending(e => e.SubmissionId) .FirstOrDefaultAsync(cancellationToken: cancellationToken))?.SubmissionId; } var submissionBuffer = new BufferBlock <Submission>(new DataflowBlockOptions { BoundedCapacity = BufferCapacity, EnsureOrdered = false, }); var errorBuffer = new BufferBlock <CrawlerError>(new DataflowBlockOptions { BoundedCapacity = BufferCapacity, EnsureOrdered = false, }); _logger.LogTrace("Work on {0}, latestSubmissionId {1}", oj.ToString(), latestSubmissionId); var inserterCancel = new CancellationTokenSource(); var crawlerCancel = new CancellationTokenSource(); // cancel crawler, it may trigger crawler to submit a Complete // or it just throws, the catch below cancels the inserter cancellationToken.Register(() => { crawlerCancel.Cancel(); }); var crawlerTask = crawler.WorkAsync(latestSubmissionId, submissionBuffer, errorBuffer, crawlerCancel.Token); var submissionInserterTask = _submissionInserter.WorkAsync(submissionBuffer, inserterCancel.Token); var errorInserterTask = _errorInserter.WorkAsync(errorBuffer, inserterCancel.Token); try { await crawlerTask; await submissionInserterTask; await errorInserterTask; } catch (Exception e) { inserterCancel.Cancel(); _logger.LogError(e, "Exception when crawling"); } }