public DiaryScraperNew(ILogger <DiaryScraperNew> logger, ScrapeContext context, DiaryScraperOptions options) { _logger = logger; _cookieContainer = new CookieContainer(); _webClient = new CF_WebClient(_cookieContainer); _context = context; _options = options; _downloadExistingChecker = new DownloadExistingChecker(Path.Combine(_options.WorkingDir, _options.DiaryName), context, _logger); _downloader = new DataDownloader($"http://{_options.DiaryName}.diary.ru", Path.Combine(_options.WorkingDir, _options.DiaryName), _cookieContainer, _logger); _downloader.BeforeDownload += (s, e) => { if (!(e.Resource is DiaryImage)) { Progress.Values[ScrapeProgressNames.CurrentUrl] = e.Resource.Url.ToLower(); } }; _downloader.AfterDownload += OnResourceDownloaded; var config = new Configuration().WithCss(); _parser = new HtmlParser(config); _moreFixer = new DiaryMoreLinksFixer(_downloader, _options.WorkingDir, _options.DiaryName); }
public DiaryScraperNew GetScraper(ScrapeTaskDescriptor descriptor, string login, string password) { try { if (descriptor.ScrapeStart > descriptor.ScrapeEnd) { throw new ArgumentException("Неверный интервал дат"); } var diaryName = GetDiaryName(descriptor.DiaryUrl); EnsureDirs(descriptor.WorkingDir, diaryName); var cfg = ConfigureLog(descriptor.WorkingDir); var logger = _serviceProvider.GetRequiredService <ILogger <DiaryScraperNew> >(); var context = GetContext(descriptor.WorkingDir, diaryName); var options = new DiaryScraperOptions { WorkingDir = descriptor.WorkingDir, DiaryName = diaryName.ToLower(), Login = login, Password = password, RequestDelay = descriptor.RequestDelay, ScrapeStart = descriptor.ScrapeStart, ScrapeEnd = descriptor.ScrapeEnd, Overwrite = descriptor.Overwrite, DownloadEdits = descriptor.DownloadEdits, DownloadAccount = descriptor.DownloadAccount }; var scraper = new DiaryScraperNew(logger, context, options); scraper.WorkFinished += (s, e) => { UnsetLog(cfg); }; descriptor.Scraper = scraper; return(scraper); } catch (Exception e) { _logger.LogError(e, "Error"); descriptor.SetError(e.Message); return(null); } }