public static List<string> GettingAllUrls(string PageSource, string MustMatchString) { List<string> suburllist = new List<string>(); HtmlUtil htmlUtil = new HtmlUtil(); PageSource = htmlUtil.EntityDecode(PageSource); StringArray datagoogle = htmlUtil.GetHyperlinkedUrls(PageSource); for (int i = 0; i < datagoogle.Length; i++) { string hreflink = datagoogle.GetString(i); if (hreflink.Contains(MustMatchString) && hreflink.Contains("goback")) { suburllist.Add(hreflink); } } return suburllist.Distinct().ToList(); }
public static List <string> GettingAllUrls(string PageSource, string MustMatchString) { List <string> suburllist = new List <string>(); HtmlUtil htmlUtil = new HtmlUtil(); PageSource = htmlUtil.EntityDecode(PageSource); StringArray datagoogle = htmlUtil.GetHyperlinkedUrls(PageSource); for (int i = 0; i < datagoogle.Length; i++) { string hreflink = datagoogle.GetString(i); if (hreflink.Contains(MustMatchString)) //&& hreflink.Contains("goback")) { suburllist.Add(hreflink); } } return(suburllist.Distinct().ToList()); }
public List<string> GettingAllUrls(string PageSource, string MustMatchString) { List<string> suburllist = new List<string>(); try { HtmlUtil htmlUtil = new HtmlUtil(); PageSource = htmlUtil.EntityDecode(PageSource); StringArray datagoogle = htmlUtil.GetHyperlinkedUrls(PageSource); for (int i = 0; i < datagoogle.Length; i++) { try { string hreflink = datagoogle.GetString(i); if (hreflink.Contains(MustMatchString) && hreflink.Contains("&authType=")) { if (hreflink.Contains("http://www.linkedin.com")) { suburllist.Add(hreflink); Log("[ " + DateTime.Now + " ] => [ URL >>> " + hreflink + " ]"); } else { suburllist.Add("http://www.linkedin.com" + hreflink); Log("[ " + DateTime.Now + " ] => [ URL >>> http://www.linkedin.com" + hreflink + " ]"); } } } catch { } } } catch { } suburllist = suburllist.Distinct().ToList(); return suburllist.Distinct().ToList(); }