public void TestStuff2() { var testInput = @" some new text attachment://hereclickonthis.exe now see? "; var urlDetector = new UrlDetector(testInput, UrlDetectorOptions.Default); var urls = urlDetector.Detect().Select(u => u.GetFullUrl()).ToList(); }
public void TestMailTo() { var inputText = $" mailTo:dale:[email protected] and then mailto://[email protected]"; var urlDetector = new UrlDetector(inputText, UrlDetectorOptions.Default | UrlDetectorOptions.HTML); var urls = urlDetector.Detect(); urls.ForEach(u => u.GetScheme()); urls.ForEach(u => u.GetHost()); Assert.Equal(2, urls.Count); }
public void TestUriSchemeLocators() { foreach (var schemeName in UriSchemeLookup.UriSchemeNames) { var urlToFind1 = $"{schemeName}://mytestsite.com"; var urlToFind2 = $"{schemeName}%3a//othersite.org"; var inputText = $"did we @>> << !!://JK find #4jadsfj the url: {urlToFind1} and this one too {urlToFind2} ?"; var urlDetector = new UrlDetector(inputText, UrlDetectorOptions.HTML, new HashSet <string> { schemeName }); var urls = urlDetector.Detect(); urls.ForEach(u => u.GetScheme()); urls.ForEach(u => u.GetHost()); Assert.Equal(2, urls.Count); } }
private void RunTest(string text, UrlDetectorOptions options, params string[] expected) { //do the detection var parser = new UrlDetector(text, options); var found = parser.Detect(); var foundArray = new string[found.Count]; for (var i = 0; i < foundArray.Length; i++) { foundArray[i] = found[i].GetOriginalUrl(); } // All expected items found, ordering irrelevant var areSame = !expected.Except(foundArray).Any() && expected.Length == foundArray.Length; Assert.True(areSame); }
public string[] GetUrls(string body) { UrlDetector detector = new UrlDetector(body, UrlDetectorOptions.QUOTE_MATCH | UrlDetectorOptions.SINGLE_QUOTE_MATCH | UrlDetectorOptions.BRACKET_MATCH | UrlDetectorOptions.JSON | UrlDetectorOptions.JAVASCRIPT | UrlDetectorOptions.XML | UrlDetectorOptions.HTML | UrlDetectorOptions.ALLOW_SINGLE_LEVEL_DOMAIN, validSchemes: new HashSet <string> { "http", "https", "ftp", "ftps", "ws", "wss" }); var discoveredUrls = detector.Detect(); if (discoveredUrls == null || !discoveredUrls.Any()) { return(Array.Empty <string>()); } return(discoveredUrls .Where(x => { if (x == null) { return false; } // Check to see if it's an IP address. If so, we can skip the TLD check. // Even if it doesn't parse to a C# Uri, it _may_ still be valid-enough, so run the TLD check on it. if (Uri.TryCreate(x.GetFullUrl(), UriKind.Absolute, out Uri? parsedUri)) { if (parsedUri.HostNameType == UriHostNameType.IPv4 || parsedUri.HostNameType == UriHostNameType.IPv6) { return true; } } // TLD check, to make sure we don't pick up files return _tlds.Value.Any(tld => x.GetHost().EndsWith($".{tld}")); }) .Select(x => x.GetFullUrl()) .ToArray() !); }