Example #1
0
        /// <summary>
        /// Sets the namespace.
        /// </summary>
        /// <param name="graph">The graph.</param>
        /// <param name="prefix">The prefix.</param>
        private static void SetNamespace(OpenGraph graph, string prefix)
        {
            if (graph.Namespaces.Any(n => n.Key.Equals(prefix, StringComparison.InvariantCultureIgnoreCase)))
            {
                return;
            }

            if (NamespaceRegistry.Instance.Namespaces.Any(ns => ns.Key.Equals(prefix, StringComparison.CurrentCultureIgnoreCase)))
            {
                var ns = NamespaceRegistry.Instance.Namespaces.First(ns2 => ns2.Key.Equals(prefix, StringComparison.InvariantCultureIgnoreCase));
                graph.Namespaces.Add(ns.Key, ns.Value);
            }
        }
Example #2
0
        /// <summary>
        /// Parses the URL asynchronous.
        /// </summary>
        /// <param name="url">The URL.</param>
        /// <param name="userAgent">The user agent.</param>
        /// <param name="validateSpecification">if set to <c>true</c> [validate specification].</param>
        /// <returns><see cref="Task{OpenGraph}"/>.</returns>
        public static async Task <OpenGraph> ParseUrlAsync(Uri url, string userAgent = "facebookexternalhit", bool validateSpecification = false)
        {
            OpenGraph result = new OpenGraph {
                OriginalUrl = url
            };

            HttpDownloader downloader = new HttpDownloader(url, null, userAgent);
            string         html       = await downloader.GetPageAsync().ConfigureAwait(false);

            result.OriginalHtml = html;

            return(ParseHtml(result, html, validateSpecification));
        }
Example #3
0
        /// <summary>
        /// Sets the namespace.
        /// </summary>
        /// <param name="graph">The graph.</param>
        /// <param name="prefix">The prefix.</param>
        private static void SetNamespace(OpenGraph graph, string prefix)
        {
            if (graph.Namespaces.Any(n => n.Key == prefix.ToLowerInvariant()))
            {
                return;
            }

            if (NamespaceRegistry.Instance.Namespaces.Any(_ => _.Key == prefix.ToLowerInvariant()))
            {
                var ns = NamespaceRegistry.Instance.Namespaces.First(_ => _.Key == prefix.ToLowerInvariant());
                graph.Namespaces.Add(ns.Key, ns.Value);
            }
        }
Example #4
0
        /// <summary>
        /// Gets the URI.
        /// </summary>
        /// <param name="result">The result.</param>
        /// <param name="property">The property.</param>
        /// <returns>The Uri.</returns>
        private static Uri GetUri(OpenGraph result, string property)
        {
            result.internalOpenGraphData.TryGetValue(property, out var url);

            try
            {
                return(new Uri(url?.FirstOrDefault()?.Value ?? string.Empty));
            }
            catch (ArgumentException)
            {
                return(null);
            }
            catch (UriFormatException)
            {
                return(null);
            }
        }
        /// <summary>
        /// Initializes the <see cref="OpenGraph" /> class.
        /// </summary>
        /// <param name="result">The result.</param>
        /// <param name="document">The document.</param>
        private static void ParseNamespaces(OpenGraph result, HtmlDocument document)
        {
            const string NamespacePattern = @"(\w+):\s?(https?://[^\s]+)";

            HtmlNode head = document.DocumentNode.SelectSingleNode("//head");
            HtmlNode html = document.DocumentNode.SelectSingleNode("html");

            if (head != null && head.Attributes.Contains("prefix") && Regex.IsMatch(head.Attributes["prefix"].Value, NamespacePattern))
            {
                var matches = Regex.Matches(
                    head.Attributes["prefix"].Value,
                    NamespacePattern,
                    RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Singleline);

                foreach (Match match in matches)
                {
                    var prefix = match.Groups[1].Value;
                    if (NamespaceRegistry.Instance.Namespaces.ContainsKey(prefix))
                    {
                        result.Namespaces.Add(prefix, NamespaceRegistry.Instance.Namespaces[prefix]);
                        continue;
                    }

                    var ns = match.Groups[2].Value;
                    result.Namespaces.Add(prefix, new OpenGraphNamespace(prefix, ns));
                }
            }
            else if (html != null && html.Attributes.Any(a => a.Name.StartsWith("xmlns:", StringComparison.InvariantCultureIgnoreCase)))
            {
                var namespaces = html.Attributes.Where(a => a.Name.StartsWith("xmlns:", StringComparison.InvariantCultureIgnoreCase));
                foreach (var ns in namespaces)
                {
#if NETSTANDARD2_1
                    var prefix = ns.Name.ToLowerInvariant().Replace("xmlns:", string.Empty, StringComparison.InvariantCultureIgnoreCase);
#else
                    var prefix = ns.Name.ToLowerInvariant().Replace("xmlns:", string.Empty);
#endif
                    result.Namespaces.Add(prefix, new OpenGraphNamespace(prefix, ns.Value));
                }
            }
            else
            {
                // append the minimum og: prefix and namespace
                result.Namespaces.Add("og", NamespaceRegistry.Instance.Namespaces["og"]);
            }
        }
Example #6
0
        /// <summary>
        /// Validates the specification.
        /// </summary>
        /// <param name="result">The result.</param>
        /// <exception cref="InvalidSpecificationException">The parsed HTML does not meet the open graph specification, missing element: {required}.</exception>
        private static void ValidateSpecification(OpenGraph result)
        {
            var prefixes = result.Namespaces.Select(ns => ns.Value.Prefix);

            var namespaces = NamespaceRegistry
                             .Instance
                             .Namespaces
                             .Where(ns => prefixes.Contains(ns.Key) && ns.Value.RequiredElements.Count > 0)
                             .Select(ns => ns.Value)
                             .ToList();

            foreach (var ns in namespaces)
            {
                foreach (var required in ns.RequiredElements)
                {
                    if (!result.Metadata.ContainsKey(string.Concat(ns.Prefix, ":", required)))
                    {
                        throw new InvalidSpecificationException($"The parsed HTML does not meet the open graph specification, missing element: {required}");
                    }
                }
            }
        }
Example #7
0
        /// <summary>
        /// Parses the HTML.
        /// </summary>
        /// <param name="result">The result.</param>
        /// <param name="content">The content.</param>
        /// <param name="validateSpecification">if set to <c>true</c> [validate specification].</param>
        /// <returns><see cref="OpenGraph"/>.</returns>
        /// <exception cref="OpenGraphNet.InvalidSpecificationException">The parsed HTML does not meet the open graph specification.</exception>
        private static OpenGraph ParseHtml(OpenGraph result, string content, bool validateSpecification = false)
        {
            HtmlDocument document = MakeDocumentToParse(content);

            ParseNamespaces(result, document);

            HtmlNodeCollection allMeta = document.DocumentNode.SelectNodes("//meta");

            var openGraphMetaTags = from meta in allMeta ?? new HtmlNodeCollection(null)
                                    where (meta.Attributes.Contains("property") && MatchesNamespacePredicate(meta.Attributes["property"].Value)) ||
                                    (meta.Attributes.Contains("name") && MatchesNamespacePredicate(meta.Attributes["name"].Value))
                                    select meta;

            StructuredMetadata lastElement = null;

            foreach (HtmlNode metaTag in openGraphMetaTags)
            {
                var prefix = GetOpenGraphPrefix(metaTag);
                SetNamespace(result, prefix);
                if (!result.Namespaces.ContainsKey(prefix))
                {
                    continue;
                }

                string value         = GetOpenGraphValue(metaTag);
                string property      = GetOpenGraphKey(metaTag);
                var    cleanProperty = CleanOpenGraphKey(prefix, property);

                value = HtmlDecodeUrl(property, value);

                if (lastElement != null && lastElement.IsMyProperty(property))
                {
                    lastElement.AddProperty(cleanProperty, value);
                }
                else if (IsChildOfExistingElement(result.internalOpenGraphData, property))
                {
                    var matchingElement =
                        result.internalOpenGraphData.First(kvp => kvp.Value.First().IsMyProperty(property));

                    var element = matchingElement.Value.FirstOrDefault(e => !e.Properties.ContainsKey(cleanProperty));
                    element?.AddProperty(cleanProperty, value);
                }
                else
                {
                    lastElement = new StructuredMetadata(result.Namespaces[prefix], cleanProperty, value);
                    result.AddMetadata(lastElement);
                }
            }

            result.Type = string.Empty;
            if (result.internalOpenGraphData.TryGetValue("og:type", out var type))
            {
                result.Type = (type.FirstOrDefault() ?? new NullMetadata()).Value ?? string.Empty;
            }

            result.Title = string.Empty;
            if (result.internalOpenGraphData.TryGetValue("og:title", out var title))
            {
                result.Title = (title.FirstOrDefault() ?? new NullMetadata()).Value ?? string.Empty;
            }

            result.Image = GetUri(result, "og:image");
            result.Url   = GetUri(result, "og:url");

            if (validateSpecification)
            {
                ValidateSpecification(result);
            }

            return(result);
        }
Example #8
0
        /// <summary>
        /// Parses the HTML for open graph content.
        /// </summary>
        /// <param name="content">The HTML to parse.</param>
        /// <param name="validateSpecification">if set to <c>true</c> verify that the document meets the required attributes of the open graph specification.</param>
        /// <returns><see cref="OpenGraph"/>.</returns>
        public static OpenGraph ParseHtml(string content, bool validateSpecification = false)
        {
            OpenGraph result = new OpenGraph();

            return(ParseHtml(result, content, validateSpecification));
        }
Example #9
0
        /// <summary>
        /// Makes the graph.
        /// </summary>
        /// <param name="title">The title.</param>
        /// <param name="type">The type.</param>
        /// <param name="image">The image.</param>
        /// <param name="url">The URL.</param>
        /// <param name="description">The description.</param>
        /// <param name="siteName">Name of the site.</param>
        /// <param name="audio">The audio.</param>
        /// <param name="video">The video.</param>
        /// <param name="locale">The locale.</param>
        /// <param name="localeAlternates">The locale alternates.</param>
        /// <param name="determiner">The determiner.</param>
        /// <returns><see cref="OpenGraph"/>.</returns>
        public static OpenGraph MakeGraph(
            string title,
            string type,
            string image,
            string url,
            string description = "",
            string siteName    = "",
            string audio       = "",
            string video       = "",
            string locale      = "",
            IList <string> localeAlternates = null,
            string determiner = "")
        {
            var graph = new OpenGraph
            {
                Title = title,
                Type  = type,
                Image = new Uri(image, UriKind.Absolute),
                Url   = new Uri(url, UriKind.Absolute),
            };
            var ns = NamespaceRegistry.Instance.Namespaces["og"];

            graph.Namespaces.Add(ns.Prefix, ns);
            graph.AddMetadata(new StructuredMetadata(ns, "title", title));
            graph.AddMetadata(new StructuredMetadata(ns, "type", type));
            graph.AddMetadata(new StructuredMetadata(ns, "image", image));
            graph.AddMetadata(new StructuredMetadata(ns, "url", url));

            if (!string.IsNullOrWhiteSpace(description))
            {
                graph.AddMetadata(new StructuredMetadata(ns, "description", description));
            }

            if (!string.IsNullOrWhiteSpace(siteName))
            {
                graph.AddMetadata(new StructuredMetadata(ns, "site_name", siteName));
            }

            if (!string.IsNullOrWhiteSpace(audio))
            {
                graph.AddMetadata(new StructuredMetadata(ns, "audio", audio));
            }

            if (!string.IsNullOrWhiteSpace(video))
            {
                graph.AddMetadata(new StructuredMetadata(ns, "video", video));
            }

            if (!string.IsNullOrWhiteSpace(locale))
            {
                graph.AddMetadata(new StructuredMetadata(ns, "locale", locale));
            }

            if (!string.IsNullOrWhiteSpace(determiner))
            {
                graph.AddMetadata(new StructuredMetadata(ns, "determiner", determiner));
            }

            if (graph.internalOpenGraphData.ContainsKey("og:locale"))
            {
                var localeElement = graph.internalOpenGraphData["og:locale"].First();
                foreach (var localeAlternate in localeAlternates ?? new List <string>())
                {
                    localeElement.AddProperty(new PropertyMetadata("alternate", localeAlternate));
                }
            }
            else
            {
                foreach (var localeAlternate in localeAlternates ?? new List <string>())
                {
                    graph.AddMetadata(new StructuredMetadata(ns, "locale:alternate", localeAlternate));
                }
            }

            return(graph);
        }