/// <summary>分析HTML中的链接</summary> /// <param name="html">Html文本</param> /// <param name="baseurl">基础Url,用于生成超链接的完整Url</param> /// <param name="filter">用于基础过滤的过滤器</param> /// <returns></returns> public static Link[] Parse(String html, String baseurl = null, Func<Link, Boolean> filter = null) { // 分析所有链接 var list = new List<Link>(); var buri = new Uri(baseurl); foreach (Match item in _regA.Matches(html)) { var link = new Link(); link.Html = item.Value; link.Name = item.Groups["名称"].Value.Trim(); link.Url = item.Groups["链接"].Value.Trim(); link.RawUrl = link.Url; // 过滤器 if (filter != null && !filter(link)) continue; link.Url = link.Url.TrimStart("#"); if (String.IsNullOrEmpty(link.Url)) continue; if (link.Url.StartsWithIgnoreCase("javascript:")) continue; // 分析title var txt = item.Groups["其它1"].Value.Trim(); if (txt.IsNullOrWhiteSpace() || !_regTitle.IsMatch(txt)) txt = item.Groups["其它2"].Value.Trim(); var mc = _regTitle.Match(txt); if (mc.Success) { link.Title = mc.Groups["标题"].Value.Trim(); } // 完善下载地址 var uri = new Uri(buri, link.RawUrl); link.Url = uri.ToString(); // 分割名称,计算结尾的时间 yyyyMMddHHmmss link.ParseTime(); // 分割版本,_v1.0.0.0 link.ParseVersion(); list.Add(link); } return list.ToArray(); }