Beispiel #1
0
        /// <summary>
        /// Based on defined blacklisted URL patterns, checks if given URL matches them
        /// </summary>
        public static bool IsURLBlacklisted(string url, WorkerConfiguration config)
        {
            // accept URL automatically if no patterns are defined to be matched
            if (config?.BlacklistedURLPatterns == null || config.BlacklistedURLPatterns.Count == 0)
            {
                return(false);
            }

            foreach (var p in config.BlacklistedURLPatterns)
            {
                if (string.IsNullOrEmpty(p))
                {
                    continue;
                }

                var pattern = GetRegexPatternForUrlMatching(p);

                if (Regex.IsMatch(url, pattern, RegexOptions.IgnoreCase))
                {
                    return(true);
                }
            }

            return(false);
        }
Beispiel #2
0
        /// <summary>
        /// Get's the relative path to file
        /// </summary>
        /// <param name="absolutePath">Absolute path of file</param>
        /// <param name="ignoreDownloadsFolder">Ignore downloads folder</param>
        /// <returns>Relative path</returns>
        public static string GetRelativeFilePath(string absolutePath, WorkerConfiguration config, bool ignoreDownloadsFolder = true)
        {
            var relative = Path.GetRelativePath(Directory.GetCurrentDirectory(), absolutePath);

            if (relative.StartsWith(config.DownloadsPath))
            {
                relative = relative.Substring(config.DownloadsPath.Length + 1);
            }

            return(relative);
        }
Beispiel #3
0
        // HELPER METHODS

        /// <summary>
        /// Based on domain whitelist and blacklist, decides if URL is allowed to be added to backlog
        /// </summary>
        public static bool IsUrlWhitelisted(string url, WorkerConfiguration config)
        {
            // check if url ends with a slash, otherwise add it
            var domain = GetDomainName(url, out _).ToLower();

            // reject url if domain is empty
            if (string.IsNullOrEmpty(domain))
            {
                return(false);
            }

            // check blacklist first
            foreach (var w in config.DomainBlacklist)
            {
                // if domain name is equal to any blacklisted domains, reject it
                if (domain == w.ToLower())
                {
                    return(false);
                }
            }

            // check whitelist second
            if (config.DomainWhitelist.Count > 0)
            {
                foreach (var w in config.DomainWhitelist)
                {
                    // if domain contains any of the words, automatically accept it
                    if (domain.Contains(w.ToLower()))
                    {
                        return(true);
                    }
                }

                // if whitelist is not empty, any non-matching domains are rejected!
                return(false);
            }

            // accept url if it doesn't contain any blacklisted word
            return(true);
        }