Esempio n. 1
0
        /// <summary>
        /// Extracts the meta tag info from the page using RegEx
        /// </summary>
        /// <param name="p_strPageHtmlContent"></param>
        /// <returns>Deprecated</returns>
        private static Dict<string, PageMeta> GetPageMetaInfo(string p_strPageHtmlContent)
        {
            Dict<string, PageMeta> dictReturnSet = new Dict<string, PageMeta>();
            PageMeta pmMetaTag = new PageMeta();

            //   Try grabbing the meta info of the page into a dictionary
            string pattern = "<meta.+?(?:name=(?:\"|')(.*?)(?:\"|').*?)?(?:property=(?:\"|')(.*?)(?:\"|').*?)?(?:content=(?:\"|')(.*?)(?:\"|'))?/?>.*?</head>";
            RegexOptions rxoOptions = RegexOptions.IgnoreCase | RegexOptions.Singleline;

            foreach (Match match in Regex.Matches(p_strPageHtmlContent, pattern, rxoOptions))
            {
                pmMetaTag = new PageMeta();
                pmMetaTag.Name = match.Groups[1].Value;
                pmMetaTag.Property = match.Groups[2].Value;
                pmMetaTag.Content = match.Groups[3].Value;
                if (!dictReturnSet.ContainsKey(match.Groups[1].Value))
                {
                    dictReturnSet.Add(match.Groups[1].Value, pmMetaTag);
                }
            }

            return dictReturnSet;
        }
Esempio n. 2
0
        /// <summary>
        /// Grabs and returns Meta tags from the page head
        /// </summary>
        /// <param name="htmlDocDocument"></param>
        /// <returns></returns>
        private static List<PageMeta> GetPageMetaInfo(HtmlDocument p_htmlDocDocument)
        {
            PageMeta pmMeta = null;
            List<PageMeta> lstMeta = new List<PageMeta>();

            if (p_htmlDocDocument.DocumentNode.SelectNodes("//meta") != null)
            {
                foreach (HtmlNode hnItem in p_htmlDocDocument.DocumentNode.SelectNodes("//meta"))
                {
                    pmMeta = new PageMeta();
                    pmMeta.Name = GetHtmlAttributeValue(hnItem.Attributes, "name");
                    pmMeta.Property = GetHtmlAttributeValue(hnItem.Attributes, "property");
                    pmMeta.Content = GetHtmlAttributeValue(hnItem.Attributes, "content");
                    lstMeta.Add(pmMeta);
                }
            }

            return lstMeta;
        }