public async Task DownloadCrawledUrl(ICrawledUrl crawledUrl, string downloadDirectory) { if (crawledUrl == null) { throw new ArgumentNullException(nameof(crawledUrl)); } if (downloadDirectory == null) { throw new ArgumentNullException(nameof(downloadDirectory)); } IPlugin downloadPlugin = _defaultPlugin; if (_plugins != null && _plugins.Count > 0) { foreach (IPlugin plugin in _plugins) { if (await plugin.IsSupportedUrl(crawledUrl.Url)) { downloadPlugin = plugin; break; } } } await downloadPlugin.Download(crawledUrl, downloadDirectory); }
public async Task Download(ICrawledUrl crawledUrl, string downloadDirectory) { _logger.Debug($"Received new url: {crawledUrl.Url}, download dir: {downloadDirectory}"); Match match = _googleDriveRegex.Match(crawledUrl.Url); if (!match.Success) { _logger.Error($"Unable to parse google drive url: {crawledUrl.Url}"); throw new DownloadException($"Unable to parse google drive url: {crawledUrl.Url}"); } string id = match.Groups[1].Value; string downloadPath = Path.Combine(downloadDirectory, crawledUrl.DownloadPath); try { //warning: returns '' in drive's root if (!Directory.Exists(downloadPath)) { Directory.CreateDirectory(downloadPath); } } catch (Exception ex) { throw new DownloadException($"Unable to create directory for file {crawledUrl.Url}", ex); } downloadPath = Path.Combine(downloadPath, $"{id.Substring(id.Length - 6, 5)}_") .TrimEnd(new[] { '/', '\\' }); _logger.Debug($"Retrieved id: {id}, download path: {downloadPath}"); try { _engine.Download(id, downloadPath, _overwriteFiles); } catch (Exception ex) { _logger.Error("GOOGLE DRIVE ERROR: " + ex); throw new DownloadException($"Unable to download {crawledUrl.Url}", ex); } }
public async Task Download(ICrawledUrl crawledUrl, string downloadDirectory) { if (_megaDownloader == null) { _logger.Fatal($"Mega downloader initialization failure (check credentials), {crawledUrl.Url} will not be downloaded!"); return; } try { await _megaDownloader.DownloadUrlAsync(crawledUrl, downloadDirectory); } catch (DownloadException ex) { throw; } catch (Exception ex) { throw new Common.Exceptions.DownloadException($"Unable to download {crawledUrl.Url}: {ex}", ex); } }
public async Task DownloadUrlAsync(ICrawledUrl crawledUrl, string downloadPath, bool overwriteFiles = false) { _logger.Debug($"[MEGA] Staring downloading {crawledUrl.Url}"); Uri uri = new Uri(crawledUrl.Url); if (await IsUrlAFolder(uri)) { await DownloadFolderAsync(uri, Path.Combine(downloadPath, crawledUrl.DownloadPath), overwriteFiles); } else { (_, string id, _) = MegaUrlDataExtractor.Extract(crawledUrl.Url); INodeInfo fileNodeInfo = await _client.GetNodeFromLinkAsync(uri); string path = Path.Combine(downloadPath, crawledUrl.DownloadPath, $"{id.Substring(0, 5)}_{fileNodeInfo.Name}"); await DownloadFileAsync(null, uri, fileNodeInfo, path, overwriteFiles); } _logger.Debug($"[MEGA] Finished downloading {crawledUrl.Url}"); }
public async Task Download(List <ICrawledUrl> crawledUrls, string downloadDirectory, CancellationToken cancellationToken) { if (crawledUrls == null) { throw new ArgumentNullException(nameof(crawledUrls)); } if (string.IsNullOrEmpty(downloadDirectory)) { throw new ArgumentException("Argument cannot be null or empty", nameof(downloadDirectory)); } using (SemaphoreSlim concurrencySemaphore = new SemaphoreSlim(4)) //todo: allow setting the count here (issue #4) { List <Task> tasks = new List <Task>(); for (int i = 0; i < crawledUrls.Count; i++) { concurrencySemaphore.Wait(); cancellationToken.ThrowIfCancellationRequested(); int entryPos = i; Task task = Task.Run(async() => { try { ICrawledUrl entry = crawledUrls[entryPos]; if (!_urlChecker.IsValidUrl(entry.Url)) { _logger.Error($"Invalid url: {entry.Url}"); return; } if (_urlChecker.IsBlacklistedUrl(entry.Url)) { _logger.Warn($"Url is blacklisted: {entry.Url}"); return; } _logger.Debug($"Downloading {entryPos + 1}/{crawledUrls.Count}: {entry.Url}"); try { _logger.Debug($"Calling url processor for: {entry.Url}"); bool isDownloadAllowed = await _crawledUrlProcessor.ProcessCrawledUrl(entry, downloadDirectory); if (isDownloadAllowed) { if (string.IsNullOrWhiteSpace(entry.DownloadPath)) { throw new DownloadException($"Download path is not filled for {entry.Url}"); } await _pluginManager.DownloadCrawledUrl(entry, downloadDirectory); } else { _logger.Debug($"ProcessCrawledUrl returned false, {entry.Url} will be skipped"); } //TODO: mark isDownloadAllowed = false entries as skipped entry.IsDownloaded = true; OnFileDownloaded(new FileDownloadedEventArgs(entry.Url, crawledUrls.Count)); } catch (DownloadException ex) { string logMessage = $"Error while downloading {entry.Url}: {ex.Message}"; if (ex.InnerException != null) { logMessage += $". Inner Exception: {ex.InnerException}"; } _logger.Error(logMessage); OnFileDownloaded(new FileDownloadedEventArgs(entry.Url, crawledUrls.Count, false, logMessage)); } catch (Exception ex) { throw new UniversalDownloaderPlatformException( $"Error while downloading {entry.Url}: {ex.Message}", ex); } } finally { concurrencySemaphore.Release(); } }, cancellationToken); tasks.Add(task); } await Task.WhenAll(tasks); _logger.Debug("Finished all tasks"); } }
public NewCrawledUrlEventArgs(ICrawledUrl crawledUrl) { _crawledUrl = crawledUrl ?? throw new ArgumentNullException(nameof(crawledUrl)); }