public static List <SingleDomain> ParseSubdomainUnfiltered(string input, string source, string keyword) { var domains = new List <SingleDomain>(); foreach (var uri in GetUris(input)) { var address = uri.Host.Replace("www.", ""); if (domains.Any(x => address.Contains(x.Address))) { continue; } var addressParts = address.Split('.'); var domain = addressParts.Take(addressParts.Length - 1).JoinStrings("."); var topLevelDomain = addressParts.Last(); var singleDomain = new SingleDomain { DomainName = domain, Keyword = keyword, TopLevelDomainName = topLevelDomain, Address = address, FullAddress = uri.OriginalString, DomainSource = source }; domains.Add(singleDomain); } return(domains); }
public static List <SingleDomain> Parse(string input, string source, string keyword) { var matches = GetMatches(input); var domains = new List <SingleDomain>(); foreach (var match in matches) { Uri uri; if (Uri.TryCreate(match.Value, UriKind.Absolute, out uri) && uri.IsDefaultPort) { var address = uri.Host.Replace("www.", ""); var addressParts = address.Split('.'); if (addressParts.Length > 2) { continue; //no subdomains } var domain = addressParts.First(); var topLevelDomain = addressParts.Last(); if (topLevelDomain == "gov" || topLevelDomain == "edu" || topLevelDomain == "mil") { continue; } var isBlockedDomain = _blockedDomains.Any(x => address.Contains(x)); var isDuplicateDomain = domains.Any(x => address.Contains(x.Address)); if (isBlockedDomain || isDuplicateDomain) { continue; } var singleDomain = new SingleDomain { DomainName = domain, Keyword = keyword, TopLevelDomainName = topLevelDomain, Address = address, FullAddress = uri.OriginalString, DomainSource = source, }; domains.Add(singleDomain); } } return(domains); }
public static bool Isomorphic(this SingleDomain x, SingleDomain y) => x.Address.Equals(y.Address, StringComparison.InvariantCultureIgnoreCase);