示例#1
0
        /// <summary>
        /// Extracts the link tag info from the page using RegEx
        /// </summary>
        /// <param name="p_strPageHtmlContent"></param>
        /// <returns>Deprecated</returns>
        private static Dict<string, PageMetaLink> GetPageMetaLinkInfo(string p_strPageHtmlContent)
        {
            Dict<string, PageMetaLink> dictReturnSet = new Dict<string, PageMetaLink>();
            PageMetaLink pmlLinkTag = new PageMetaLink();

            //   Try grabbing the meta info of the page into a dictionary
            string pattern = "<link.+?(?:rel=(?:\"|')(.*?)(?:\"|').*?)?(?:type=(?:\"|')(.*?)(?:\"|').*?)?(?:href=(?:\"|')(.*?)(?:\"|'))?/?>.*?</head>";
            RegexOptions rxoOptions = RegexOptions.IgnoreCase | RegexOptions.Singleline;

            foreach (Match match in Regex.Matches(p_strPageHtmlContent, pattern, rxoOptions))
            {
                pmlLinkTag = new PageMetaLink();
                pmlLinkTag.Rel = match.Groups[1].Value;
                pmlLinkTag.Type = match.Groups[2].Value;
                pmlLinkTag.Href = match.Groups[3].Value;
                dictReturnSet.Add(match.Groups[1].Value, pmlLinkTag);
            }

            return dictReturnSet;
        }
示例#2
0
        /// <summary>
        /// Grabs and returns Link tags from the page head
        /// </summary>
        /// <param name="htmlDocDocument"></param>
        /// <returns></returns>
        private static List<PageMetaLink> GetPageMetaLinkInfo(HtmlDocument p_htmlDocDocument)
        {
            PageMetaLink pmlLink = null;
            List<PageMetaLink> lstLink = new List<PageMetaLink>();

            if (p_htmlDocDocument.DocumentNode.SelectNodes("//link") != null)
            {
                foreach (HtmlNode hnItem in p_htmlDocDocument.DocumentNode.SelectNodes("//link"))
                {
                    pmlLink = new PageMetaLink();
                    pmlLink.Rel = GetHtmlAttributeValue(hnItem.Attributes, "rel");
                    pmlLink.Href = GetHtmlAttributeValue(hnItem.Attributes, "href");
                    pmlLink.Type = GetHtmlAttributeValue(hnItem.Attributes, "type");
                    lstLink.Add(pmlLink);
                }
            }

            return lstLink;
        }