protected bool TakeUrls(string strVisitUrl, string strReturnPage) { HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument { OptionAddDebuggingAttributes = false, OptionAutoCloseOnEnd = true, OptionFixNestedTags = true, OptionReadEncoding = true }; htmlDoc.LoadHtml(strReturnPage); string baseUrl = new Uri(strVisitUrl).GetLeftPart(UriPartial.Authority); DocumentWithLinks links = htmlDoc.GetLinks(); List <string> lstRevomeSame = new List <string>(); List <string> lstThisTimesUrls = new List <string>(); foreach (string link in links.Links.Union(links.References)) { if (string.IsNullOrEmpty(link)) { continue; } string decodedLink = link; string normalizedLink = decodedLink; if (string.IsNullOrEmpty(normalizedLink)) { continue; } MatchCollection matchs = Regex.Matches(normalizedLink, m_strCnblogsUrlFilterRule, RegexOptions.Singleline); if (matchs.Count > 0) { string strLinkText = ""; if (links.m_dicLink2Text.Keys.Contains(normalizedLink)) { strLinkText = links.m_dicLink2Text[normalizedLink]; } if (strLinkText == "") { if (links.m_dicLink2Text.Keys.Contains(link)) { strLinkText = links.m_dicLink2Text[link].TrimEnd().TrimStart(); } } PrintLog(strLinkText + "\n"); PrintLog(normalizedLink + "\n"); lstThisTimesUrls.Add(normalizedLink); } } bool bNoArticle = CheckArticles(lstThisTimesUrls); return(bNoArticle); }