public DiaryScraperNew(ILogger <DiaryScraperNew> logger, ScrapeContext context, DiaryScraperOptions options) { _logger = logger; _cookieContainer = new CookieContainer(); _webClient = new CF_WebClient(_cookieContainer); _context = context; _options = options; _downloadExistingChecker = new DownloadExistingChecker(Path.Combine(_options.WorkingDir, _options.DiaryName), context, _logger); _downloader = new DataDownloader($"http://{_options.DiaryName}.diary.ru", Path.Combine(_options.WorkingDir, _options.DiaryName), _cookieContainer, _logger); _downloader.BeforeDownload += (s, e) => { if (!(e.Resource is DiaryImage)) { Progress.Values[ScrapeProgressNames.CurrentUrl] = e.Resource.Url.ToLower(); } }; _downloader.AfterDownload += OnResourceDownloaded; var config = new Configuration().WithCss(); _parser = new HtmlParser(config); _moreFixer = new DiaryMoreLinksFixer(_downloader, _options.WorkingDir, _options.DiaryName); }
public async Task <DataDownloaderResult> Download(DownloadResource downloadResource, bool ignore404 = true, int requestDelay = 0) { if (downloadResource == null || string.IsNullOrEmpty(downloadResource.Url)) { throw new ArgumentException("Для скачивания должны быть заполнены пути к данным"); } _logger.LogInformation("Downloading data: " + downloadResource.Url); var uri = downloadResource.Url.StartsWith("htt") ? new Uri(downloadResource.Url) : new Uri(new Uri(BaseUrl), downloadResource.Url); var filePath = string.IsNullOrEmpty(downloadResource.RelativePath) ? string.Empty : Path.Combine(_diaryPath, downloadResource.RelativePath); var client = new CF_WebClient(_cookieContainer); BeforeDownload?.Invoke(this, new DataDownloaderEventArgs { Resource = downloadResource }); Thread.Sleep(requestDelay); byte[] downloadedData; var retries = 0; while (true) { try { downloadedData = await client.DownloadDataTaskAsync(uri); break; //i want to break freeeeee } catch (WebException e) { if (e.Status == WebExceptionStatus.ProtocolError && ignore404) { var response = e.Response as HttpWebResponse; if (response != null) { if (response.StatusCode == HttpStatusCode.NotFound) { _logger.LogWarning("Url not found: " + e.Response.ResponseUri.AbsoluteUri); downloadResource.LocalPath = ""; return(new DataDownloaderResult { Resource = downloadResource, DownloadedData = null }); } } } retries += 1; _logger.LogError(e, $"Error, retry count: {retries}"); if (retries >= Constants.DownloadRetryCount) { throw; } Thread.Sleep(2000); } } AfterDownload?.Invoke(this, new DataDownloaderEventArgs { Resource = downloadResource, DownloadedData = downloadedData }); if (!string.IsNullOrEmpty(filePath)) { using (var f = File.Create(filePath)) { await f.WriteAsync(downloadedData, 0, downloadedData.Length); } } return(new DataDownloaderResult { Resource = downloadResource, DownloadedData = downloadedData }); }