public static Dictionary <string, string> GetLinks(string sContent, string sUrl, ref Dictionary <string, string> lisDes) { Dictionary <string, string> dictionary = new Dictionary <string, string>(); smethod_0(sContent, sUrl, ref dictionary); string str = CRegex.GetDomain(sUrl).ToLower(); MatchCollection matchs = new Regex("<script[^>]+src\\s*=\\s*(?:'(?<src>[^']+)'|\"(?<src>[^\"]+)\"|(?<src>[^>\\s]+))\\s*[^>]*>", RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline | RegexOptions.IgnoreCase).Matches(sContent); for (int i = matchs.Count - 1; i >= 0; i--) { Match match = matchs[i]; string url = CRegex.GetUrl(sUrl, match.Groups["src"].Value); if (str.CompareTo(CRegex.GetDomain(url).ToLower()) == 0) { string htmlByUrl = CSocket.GetHtmlByUrl(url); if (htmlByUrl.Length != 0) { smethod_0(htmlByUrl, url, ref dictionary); } } } if (dictionary.Count == 0) { return(GetLinksFromRss(sContent, sUrl, ref lisDes)); } return(dictionary); }
public static Dictionary <string, string> GetLinks(string sContent, string sUrl, ref Dictionary <string, string> lisDes) { Dictionary <string, string> lisA = new Dictionary <string, string>(); _GetLinks(sContent, sUrl, ref lisA); string domain = CRegex.GetDomain(sUrl).ToLower(); //抓取脚本输出的链接 Regex re = new Regex(@"<script[^>]+src\s*=\s*(?:'(?<src>[^']+)'|""(?<src>[^""]+)""|(?<src>[^>\s]+))\s*[^>]*>", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase); MatchCollection mcs = re.Matches(sContent); //foreach (Match mc in mcs) for (int i = mcs.Count - 1; i >= 0; i--) { Match mc = mcs[i]; string subUrl = CRegex.GetUrl(sUrl, mc.Groups["src"].Value); if (domain.CompareTo(CRegex.GetDomain(subUrl).ToLower()) != 0) { //同一域的才提炼 continue; } string subContent = CSocket.GetHtmlByUrl(subUrl); if (subContent.Length == 0) { continue; } _GetLinks(subContent, subUrl, ref lisA); } if (lisA.Count == 0) { return(GetLinksFromRss(sContent, sUrl, ref lisDes)); } return(lisA); }