private async Task SetPageProcessingStateAsync(int pageNumber, ProcessingState state, string errorMessage) { var previousVersion = await _context.PagesProcessingStatuses.FirstOrDefaultAsync(c => c.Id == pageNumber); if (previousVersion != null) { switch (state) { case ProcessingState.SuccessfullProcessed: throw new ArgumentException($"Atempt to save the page {pageNumber} which was already saved before"); case ProcessingState.SuccessfullReprocessed: case ProcessingState.Failed: previousVersion.Status = (int)state; previousVersion.AttemptToProcess++; previousVersion.LastError = errorMessage; break; } } else { switch (state) { case ProcessingState.SuccessfullReprocessed: throw new ArgumentException($"Attempt to set success reprocessed state failed. There is no the page {pageNumber} in the unprocessed list"); case ProcessingState.SuccessfullProcessed: case ProcessingState.Failed: { var pageProcessingStatus = new PageProcessingStatus(); pageProcessingStatus.AttemptToProcess = 1; pageProcessingStatus.Id = pageNumber; pageProcessingStatus.Status = (int)state; pageProcessingStatus.LastError = errorMessage; _context.PagesProcessingStatuses.Add(pageProcessingStatus); } break; } } await _context.SaveChangesAsync(); }
private void ScrampingStep() { if (_delayCounter <= 0) { #region Started if (_currentState == States.Started) { LogRtbAppendText("Navigating site: " + (_currentVacancyIndex + 1) + " of " + _urlsForVacancyLinkSearch.Count + "\r\n"); Navigate(_urlsForVacancyLinkSearch[_currentUrlIndexForLinkSearch++]); _pageStatus = PageProcessingStatus.Collecting; SetCurrentState(States.CollectingLinks); return; } #endregion if (_pageLouded) { #region WatingForBrowser if (_currentState == States.WatingForBrowser) { SetCurrentState(_previousState); } #endregion #region CollectingLinks if (_currentState == States.CollectingLinks) { if (_currentUrlIndexForLinkSearch <= _urlsForVacancyLinkSearch.Count) { if (_pageStatus == PageProcessingStatus.Navigating) { LogRtbAppendText("Navigating site: " + (_currentUrlIndexForLinkSearch) + " of " + _urlsForVacancyLinkSearch.Count + "\r\n"); Navigate(_urlsForVacancyLinkSearch[_currentUrlIndexForLinkSearch - 1]); _pageStatus = PageProcessingStatus.Collecting; } else { LogRtbAppendText("Processing site: " + (_currentUrlIndexForLinkSearch) + " of " + _urlsForVacancyLinkSearch.Count + "\r\n"); GetVacancyes(); _currentUrlIndexForLinkSearch++; _delayCounter = GenerateDelay(_delayPodium, _delayDeviation); TimeToNextVacancyLblUpdateContent(); _pageStatus = PageProcessingStatus.Navigating; } return; } if (_vacancy.Count == 0) { StopJob("There is no vacancyes."); } PrintCollectedVacancyesReport(); SetCurrentState(States.CollectingData); LogRtbAppendText("Navigating site: " + (_currentVacancyIndex + 1) + " of " + _vacancy.Count + "\r\n"); Navigate(_vacancy[_currentVacancyIndex].VacancyPageUrl); _pageStatus = PageProcessingStatus.Collecting; return; } #endregion #region CollectingData if (_currentState == States.CollectingData) { if (_currentVacancyIndex < _vacancy.Count) //_vacancy.Count { if (_pageStatus == PageProcessingStatus.Navigating) { LogRtbAppendText("Navigating site: " + (_currentVacancyIndex + 1) + " of " + _vacancy.Count + "\r\n"); Navigate(_vacancy[_currentVacancyIndex].VacancyPageUrl); _pageStatus = PageProcessingStatus.Collecting; } else { LogRtbAppendText("Processing site: " + (_currentVacancyIndex + 1) + " of " + _vacancy.Count + "\r\n"); GetVacancyData(); WriteVacancyHtmlToFile(); SaveVacancyToDb(); _currentVacancyIndex++; _delayCounter = GenerateDelay(_delayPodium, _delayDeviation); TimeToNextVacancyLblUpdateContent(); _pageStatus = PageProcessingStatus.Navigating; } return; } PrintCollectedDataReport(); //WriteAllVacancyesHtmlToFile(); //SaveAllVacancyesToDb(); StopJob("Job done!"); } #endregion } else { #region SET VAITING_FOR_BROWSER_DELAY _delayCounter = VaitingForBrowserDelay; TimeToNextVacancyLblUpdateContent(); if (_currentState != States.WatingForBrowser) { _previousState = _currentState; } SetCurrentState(States.WatingForBrowser); #endregion } } else { _delayCounter--; TimeToNextVacancyLblUpdateContent(); } }