Beispiel #1
0
        /// <summary>
        /// Grabs and returns Link tags from the page head
        /// </summary>
        /// <param name="htmlDocDocument"></param>
        /// <returns></returns>
        private static List <PageMetaLink> GetPageMetaLinkInfo(HtmlDocument p_htmlDocDocument)
        {
            PageMetaLink        pmlLink = null;
            List <PageMetaLink> lstLink = new List <PageMetaLink>();

            if (p_htmlDocDocument.DocumentNode.SelectNodes("//link") != null)
            {
                foreach (HtmlNode hnItem in p_htmlDocDocument.DocumentNode.SelectNodes("//link"))
                {
                    pmlLink      = new PageMetaLink();
                    pmlLink.Rel  = GetHtmlAttributeValue(hnItem.Attributes, "rel");
                    pmlLink.Href = GetHtmlAttributeValue(hnItem.Attributes, "href");
                    pmlLink.Type = GetHtmlAttributeValue(hnItem.Attributes, "type");
                    lstLink.Add(pmlLink);
                }
            }

            return(lstLink);
        }
Beispiel #2
0
        /// <summary>
        /// Extracts the link tag info from the page using RegEx
        /// </summary>
        /// <param name="p_strPageHtmlContent"></param>
        /// <returns>Deprecated</returns>
        private static Dict <string, PageMetaLink> GetPageMetaLinkInfo(string p_strPageHtmlContent)
        {
            Dict <string, PageMetaLink> dictReturnSet = new Dict <string, PageMetaLink>();
            PageMetaLink pmlLinkTag = new PageMetaLink();

            //   Try grabbing the meta info of the page into a dictionary
            string       pattern    = "<link.+?(?:rel=(?:\"|')(.*?)(?:\"|').*?)?(?:type=(?:\"|')(.*?)(?:\"|').*?)?(?:href=(?:\"|')(.*?)(?:\"|'))?/?>.*?</head>";
            RegexOptions rxoOptions = RegexOptions.IgnoreCase | RegexOptions.Singleline;

            foreach (Match match in Regex.Matches(p_strPageHtmlContent, pattern, rxoOptions))
            {
                pmlLinkTag      = new PageMetaLink();
                pmlLinkTag.Rel  = match.Groups[1].Value;
                pmlLinkTag.Type = match.Groups[2].Value;
                pmlLinkTag.Href = match.Groups[3].Value;
                dictReturnSet.Add(match.Groups[1].Value, pmlLinkTag);
            }

            return(dictReturnSet);
        }