/// <summary> /// Extracts the link tag info from the page using RegEx /// </summary> /// <param name="p_strPageHtmlContent"></param> /// <returns>Deprecated</returns> private static Dict<string, PageMetaLink> GetPageMetaLinkInfo(string p_strPageHtmlContent) { Dict<string, PageMetaLink> dictReturnSet = new Dict<string, PageMetaLink>(); PageMetaLink pmlLinkTag = new PageMetaLink(); // Try grabbing the meta info of the page into a dictionary string pattern = "<link.+?(?:rel=(?:\"|')(.*?)(?:\"|').*?)?(?:type=(?:\"|')(.*?)(?:\"|').*?)?(?:href=(?:\"|')(.*?)(?:\"|'))?/?>.*?</head>"; RegexOptions rxoOptions = RegexOptions.IgnoreCase | RegexOptions.Singleline; foreach (Match match in Regex.Matches(p_strPageHtmlContent, pattern, rxoOptions)) { pmlLinkTag = new PageMetaLink(); pmlLinkTag.Rel = match.Groups[1].Value; pmlLinkTag.Type = match.Groups[2].Value; pmlLinkTag.Href = match.Groups[3].Value; dictReturnSet.Add(match.Groups[1].Value, pmlLinkTag); } return dictReturnSet; }
/// <summary> /// Grabs and returns Link tags from the page head /// </summary> /// <param name="htmlDocDocument"></param> /// <returns></returns> private static List<PageMetaLink> GetPageMetaLinkInfo(HtmlDocument p_htmlDocDocument) { PageMetaLink pmlLink = null; List<PageMetaLink> lstLink = new List<PageMetaLink>(); if (p_htmlDocDocument.DocumentNode.SelectNodes("//link") != null) { foreach (HtmlNode hnItem in p_htmlDocDocument.DocumentNode.SelectNodes("//link")) { pmlLink = new PageMetaLink(); pmlLink.Rel = GetHtmlAttributeValue(hnItem.Attributes, "rel"); pmlLink.Href = GetHtmlAttributeValue(hnItem.Attributes, "href"); pmlLink.Type = GetHtmlAttributeValue(hnItem.Attributes, "type"); lstLink.Add(pmlLink); } } return lstLink; }