public async Task Perform(CrawlRequestBase request) { var crawler = _grainFactory.GetGrain <IProcessCrawlRequests>(request.Id); var status = await crawler.GetStatus(); if (status == CrawlJobStatus.Pending) { var completionTask = await crawler.Process(request); await completionTask; } }
public async Task <CrawlJobStatus> Enqueue(CrawlRequestBase request) { if (State.Data.Status == CrawlJobStatus.Pending) { if (!request.ForceNew) { return(State.Data.Status); } Reset(); } else if (State.Data.Status == CrawlJobStatus.Running) { if (!request.ForceNew) { return(CrawlJobStatus.Conflict); } await StopProcessing(); // cancel the current job and save it's progress Reset(); } try { await _enqueueMutex.WaitAsync(); // mutex obtained, last check to see if we're already processing if (State.Data.Status == CrawlJobStatus.Running) { return(State.Data.Status); } if (State.Data.Status != CrawlJobStatus.None) { State.Data = new CrawlStateSnapshot(); } _cancellationTokenSource = new CancellationTokenSource(); _jobId = _jobClient.Enqueue <CrawlJobPerformer>(cjp => cjp.Perform(request)); State.Data.Status = CrawlJobStatus.Pending; return(State.Data.Status); } finally { _enqueueMutex.Release(); } }
/// <summary> /// Template method for all derived strategies to follow. /// </summary> /// <param name="request"></param> /// <param name="persistStateFunc"></param> /// <param name="cancellationToken"></param> /// <returns></returns> public async Task ProcessRequestAsync( CrawlRequestBase request, CancellationToken cancellationToken) { _cancellationToken = cancellationToken; await InitializeState(cancellationToken); InitializeDataflowInternal(); _stopwatch.Start(); await _uriPropagatorBlock.SendAsync(request.Uri, cancellationToken); }
public ICrawlerStrategy Create(CrawlRequestBase request) { switch (request) { case HyperlinkCrawlRequest hcr: return(new HyperlinkCrawlerStrategy( _configuration.GetCrawlerSettings <HyperlinkCrawlerSettings>(), GetOrAddClient(CrawlRequestType.Hyperlink), _logger, hcr)); default: throw new NotImplementedException(); } }
/// <summary> /// This method is invoked indeterministically from the Hangfire job server. /// </summary> /// <param name="request"></param> /// <returns></returns> public virtual async Task <Task> Process(CrawlRequestBase request) { // should never be null if a job has been enqueued, except for unit tests if (_cancellationTokenSource == null) { _cancellationTokenSource = new CancellationTokenSource(); } _strategy = _crawlerStrategyFactory.Value.Create(request); if (request.CompletionWindow != Timeout.InfiniteTimeSpan) { _cancellationTokenSource.CancelAfter( request.CompletionWindow.Add(TimeSpan.FromMilliseconds(200))); } await _strategy.ProcessRequestAsync(request, _cancellationTokenSource.Token); RegisterTimer( PersistStateCallback, null, TimeSpan.Zero, _strategy.PersistStateInterval); // Task.Factory.StartNew( // async () => // { // // while no cancellation has been requested AND the strategy // // has not completed, periodically save the state. // // IsCompleted here does not mean success, but may mean cancelled as well // while (!_cancellationTokenSource.IsCancellationRequested && // !strategy.Completion.IsCompleted) // { // await PersistState(strategy.GetSnapshot()); // await Task.Delay(strategy.PersistStateInterval); // } // }, // CancellationToken.None, // TaskCreationOptions.None, // TaskScheduler.Current); return(_strategy.Completion .ContinueWith(_ => PersistState(_strategy.GetSnapshot()), TaskScheduler.Current) .Unwrap()); }