/// <summary> /// Based on defined blacklisted URL patterns, checks if given URL matches them /// </summary> public static bool IsURLBlacklisted(string url, WorkerConfiguration config) { // accept URL automatically if no patterns are defined to be matched if (config?.BlacklistedURLPatterns == null || config.BlacklistedURLPatterns.Count == 0) { return(false); } foreach (var p in config.BlacklistedURLPatterns) { if (string.IsNullOrEmpty(p)) { continue; } var pattern = GetRegexPatternForUrlMatching(p); if (Regex.IsMatch(url, pattern, RegexOptions.IgnoreCase)) { return(true); } } return(false); }
/// <summary> /// Get's the relative path to file /// </summary> /// <param name="absolutePath">Absolute path of file</param> /// <param name="ignoreDownloadsFolder">Ignore downloads folder</param> /// <returns>Relative path</returns> public static string GetRelativeFilePath(string absolutePath, WorkerConfiguration config, bool ignoreDownloadsFolder = true) { var relative = Path.GetRelativePath(Directory.GetCurrentDirectory(), absolutePath); if (relative.StartsWith(config.DownloadsPath)) { relative = relative.Substring(config.DownloadsPath.Length + 1); } return(relative); }
// HELPER METHODS /// <summary> /// Based on domain whitelist and blacklist, decides if URL is allowed to be added to backlog /// </summary> public static bool IsUrlWhitelisted(string url, WorkerConfiguration config) { // check if url ends with a slash, otherwise add it var domain = GetDomainName(url, out _).ToLower(); // reject url if domain is empty if (string.IsNullOrEmpty(domain)) { return(false); } // check blacklist first foreach (var w in config.DomainBlacklist) { // if domain name is equal to any blacklisted domains, reject it if (domain == w.ToLower()) { return(false); } } // check whitelist second if (config.DomainWhitelist.Count > 0) { foreach (var w in config.DomainWhitelist) { // if domain contains any of the words, automatically accept it if (domain.Contains(w.ToLower())) { return(true); } } // if whitelist is not empty, any non-matching domains are rejected! return(false); } // accept url if it doesn't contain any blacklisted word return(true); }