public static List <string> GetUrls(string regex, string html, string baseUrl) { var list = new List <string>(); if (string.IsNullOrEmpty(regex)) { regex = "<a\\s*.*?href\\s*=\\s*(?:\"(?<url>[^\"]*)\"|'(?<url>[^']*)'|(?<url>[^>\\s]*)).*?>"; } var groupName = "url"; var arraylist = GetContents(groupName, regex, html); foreach (var rawUrl in arraylist) { var url = PageUtils.GetUrlByBaseUrl(rawUrl, baseUrl); if (!string.IsNullOrEmpty(url) && !list.Contains(url)) { list.Add(url); } } return(list); }
public static string GetUrl(string regex, string html, string baseUrl) { return(PageUtils.GetUrlByBaseUrl(GetContent("url", regex, html), baseUrl)); }