Пример #1
0
        public static List <string> GetUrls(string regex, string html, string baseUrl)
        {
            var list = new List <string>();

            if (string.IsNullOrEmpty(regex))
            {
                regex = "<a\\s*.*?href\\s*=\\s*(?:\"(?<url>[^\"]*)\"|'(?<url>[^']*)'|(?<url>[^>\\s]*)).*?>";
            }
            var groupName = "url";
            var arraylist = GetContents(groupName, regex, html);

            foreach (var rawUrl in arraylist)
            {
                var url = PageUtils.GetUrlByBaseUrl(rawUrl, baseUrl);
                if (!string.IsNullOrEmpty(url) && !list.Contains(url))
                {
                    list.Add(url);
                }
            }
            return(list);
        }
Пример #2
0
 public static string GetUrl(string regex, string html, string baseUrl)
 {
     return(PageUtils.GetUrlByBaseUrl(GetContent("url", regex, html), baseUrl));
 }