public void TestStuff2()
        {
            var testInput   = @" some new text attachment://hereclickonthis.exe now see? ";
            var urlDetector = new UrlDetector(testInput, UrlDetectorOptions.Default);

            var urls = urlDetector.Detect().Select(u => u.GetFullUrl()).ToList();
        }
Esempio n. 2
0
        public void TestMailTo()
        {
            var inputText   = $" mailTo:dale:[email protected] and then mailto://[email protected]";
            var urlDetector = new UrlDetector(inputText, UrlDetectorOptions.Default | UrlDetectorOptions.HTML);
            var urls        = urlDetector.Detect();

            urls.ForEach(u => u.GetScheme());
            urls.ForEach(u => u.GetHost());
            Assert.Equal(2, urls.Count);
        }
Esempio n. 3
0
 public void TestUriSchemeLocators()
 {
     foreach (var schemeName in UriSchemeLookup.UriSchemeNames)
     {
         var urlToFind1  = $"{schemeName}://mytestsite.com";
         var urlToFind2  = $"{schemeName}%3a//othersite.org";
         var inputText   = $"did we @>> << !!://JK find #4jadsfj the url: {urlToFind1} and this one too {urlToFind2} ?";
         var urlDetector = new UrlDetector(inputText, UrlDetectorOptions.HTML, new HashSet <string> {
             schemeName
         });
         var urls = urlDetector.Detect();
         urls.ForEach(u => u.GetScheme());
         urls.ForEach(u => u.GetHost());
         Assert.Equal(2, urls.Count);
     }
 }
Esempio n. 4
0
        private void RunTest(string text, UrlDetectorOptions options, params string[] expected)
        {
            //do the detection
            var parser     = new UrlDetector(text, options);
            var found      = parser.Detect();
            var foundArray = new string[found.Count];

            for (var i = 0; i < foundArray.Length; i++)
            {
                foundArray[i] = found[i].GetOriginalUrl();
            }

            // All expected items found, ordering irrelevant
            var areSame = !expected.Except(foundArray).Any() && expected.Length == foundArray.Length;

            Assert.True(areSame);
        }
Esempio n. 5
0
        public string[] GetUrls(string body)
        {
            UrlDetector detector = new UrlDetector(body,
                                                   UrlDetectorOptions.QUOTE_MATCH |
                                                   UrlDetectorOptions.SINGLE_QUOTE_MATCH |
                                                   UrlDetectorOptions.BRACKET_MATCH |
                                                   UrlDetectorOptions.JSON |
                                                   UrlDetectorOptions.JAVASCRIPT |
                                                   UrlDetectorOptions.XML |
                                                   UrlDetectorOptions.HTML |
                                                   UrlDetectorOptions.ALLOW_SINGLE_LEVEL_DOMAIN,
                                                   validSchemes: new HashSet <string> {
                "http", "https", "ftp", "ftps", "ws", "wss"
            });

            var discoveredUrls = detector.Detect();

            if (discoveredUrls == null || !discoveredUrls.Any())
            {
                return(Array.Empty <string>());
            }
            return(discoveredUrls
                   .Where(x =>
            {
                if (x == null)
                {
                    return false;
                }

                // Check to see if it's an IP address. If so, we can skip the TLD check.
                // Even if it doesn't parse to a C# Uri, it _may_ still be valid-enough, so run the TLD check on it.
                if (Uri.TryCreate(x.GetFullUrl(), UriKind.Absolute, out Uri? parsedUri))
                {
                    if (parsedUri.HostNameType == UriHostNameType.IPv4 || parsedUri.HostNameType == UriHostNameType.IPv6)
                    {
                        return true;
                    }
                }

                // TLD check, to make sure we don't pick up files
                return _tlds.Value.Any(tld => x.GetHost().EndsWith($".{tld}"));
            })
                   .Select(x => x.GetFullUrl())
                   .ToArray() !);
        }