Example #1
0
        /// <summary>
        /// Parses the HTML.
        /// </summary>
        /// <param name="result">The result.</param>
        /// <param name="content">The content.</param>
        /// <param name="validateSpecification">if set to <c>true</c> [validate specification].</param>
        /// <returns><see cref="OpenGraph"/></returns>
        /// <exception cref="InvalidSpecificationException">The parsed HTML does not meet the open graph specification</exception>
        private static OpenGraph ParseHtml(OpenGraph result, string content, bool validateSpecification = false)
        {
            int    indexOfClosingHead = Regex.Match(content, "</head>").Index;
            string toParse            = content.Substring(0, indexOfClosingHead + 7);

            toParse = toParse + "<body></body></html>\r\n";

            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(toParse);

            HtmlNodeCollection allMeta = document.DocumentNode.SelectNodes("//meta");
            var urlPropertyPatterns    = new[] { "image", "url^" };
            var openGraphMetaTags      = from meta in allMeta ?? new HtmlNodeCollection(null)
                                         where (meta.Attributes.Contains("property") && meta.Attributes["property"].Value.StartsWith("og:")) ||
                                         (meta.Attributes.Contains("name") && meta.Attributes["name"].Value.StartsWith("og:"))
                                         select meta;

            foreach (HtmlNode metaTag in openGraphMetaTags)
            {
                string value    = GetOpenGraphValue(metaTag);
                string property = GetOpenGraphKey(metaTag);
                if (string.IsNullOrWhiteSpace(value))
                {
                    continue;
                }

                if (result._openGraphData.ContainsKey(property))
                {
                    continue;
                }

                foreach (var urlPropertyPattern in urlPropertyPatterns)
                {
                    if (Regex.IsMatch(property, urlPropertyPattern))
                    {
                        value = HtmlDecodeUrl(value);
                        break;
                    }
                }
                result._openGraphData.Add(property, value);
            }

            string type;

            result._openGraphData.TryGetValue("type", out type);
            result.Type = type ?? string.Empty;

            string title;

            result._openGraphData.TryGetValue("title", out title);
            result.Title = title ?? string.Empty;

            try
            {
                string image;
                result._openGraphData.TryGetValue("image", out image);
                result.Image = new Uri(image ?? string.Empty);
            }
            catch (UriFormatException)
            {
                // do nothing
            }
            catch (ArgumentException)
            {
                // do nothing
            }

            try
            {
                string url;
                result._openGraphData.TryGetValue("url", out url);
                result.Url = new Uri(url ?? string.Empty);
            }
            catch (UriFormatException)
            {
                // do nothing
            }
            catch (ArgumentException)
            {
                // do nothing
            }

            if (validateSpecification)
            {
                foreach (string required in RequiredMeta)
                {
                    if (!result.ContainsKey(required))
                    {
                        throw new InvalidSpecificationException("The parsed HTML does not meet the open graph specification");
                    }
                }
            }

            return(result);
        }