/// <summary> /// Sets the namespace. /// </summary> /// <param name="graph">The graph.</param> /// <param name="prefix">The prefix.</param> private static void SetNamespace(OpenGraph graph, string prefix) { if (graph.Namespaces.Any(n => n.Key.Equals(prefix, StringComparison.InvariantCultureIgnoreCase))) { return; } if (NamespaceRegistry.Instance.Namespaces.Any(ns => ns.Key.Equals(prefix, StringComparison.CurrentCultureIgnoreCase))) { var ns = NamespaceRegistry.Instance.Namespaces.First(ns2 => ns2.Key.Equals(prefix, StringComparison.InvariantCultureIgnoreCase)); graph.Namespaces.Add(ns.Key, ns.Value); } }
/// <summary> /// Parses the URL asynchronous. /// </summary> /// <param name="url">The URL.</param> /// <param name="userAgent">The user agent.</param> /// <param name="validateSpecification">if set to <c>true</c> [validate specification].</param> /// <returns><see cref="Task{OpenGraph}"/>.</returns> public static async Task <OpenGraph> ParseUrlAsync(Uri url, string userAgent = "facebookexternalhit", bool validateSpecification = false) { OpenGraph result = new OpenGraph { OriginalUrl = url }; HttpDownloader downloader = new HttpDownloader(url, null, userAgent); string html = await downloader.GetPageAsync().ConfigureAwait(false); result.OriginalHtml = html; return(ParseHtml(result, html, validateSpecification)); }
/// <summary> /// Sets the namespace. /// </summary> /// <param name="graph">The graph.</param> /// <param name="prefix">The prefix.</param> private static void SetNamespace(OpenGraph graph, string prefix) { if (graph.Namespaces.Any(n => n.Key == prefix.ToLowerInvariant())) { return; } if (NamespaceRegistry.Instance.Namespaces.Any(_ => _.Key == prefix.ToLowerInvariant())) { var ns = NamespaceRegistry.Instance.Namespaces.First(_ => _.Key == prefix.ToLowerInvariant()); graph.Namespaces.Add(ns.Key, ns.Value); } }
/// <summary> /// Gets the URI. /// </summary> /// <param name="result">The result.</param> /// <param name="property">The property.</param> /// <returns>The Uri.</returns> private static Uri GetUri(OpenGraph result, string property) { result.internalOpenGraphData.TryGetValue(property, out var url); try { return(new Uri(url?.FirstOrDefault()?.Value ?? string.Empty)); } catch (ArgumentException) { return(null); } catch (UriFormatException) { return(null); } }
/// <summary> /// Initializes the <see cref="OpenGraph" /> class. /// </summary> /// <param name="result">The result.</param> /// <param name="document">The document.</param> private static void ParseNamespaces(OpenGraph result, HtmlDocument document) { const string NamespacePattern = @"(\w+):\s?(https?://[^\s]+)"; HtmlNode head = document.DocumentNode.SelectSingleNode("//head"); HtmlNode html = document.DocumentNode.SelectSingleNode("html"); if (head != null && head.Attributes.Contains("prefix") && Regex.IsMatch(head.Attributes["prefix"].Value, NamespacePattern)) { var matches = Regex.Matches( head.Attributes["prefix"].Value, NamespacePattern, RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Singleline); foreach (Match match in matches) { var prefix = match.Groups[1].Value; if (NamespaceRegistry.Instance.Namespaces.ContainsKey(prefix)) { result.Namespaces.Add(prefix, NamespaceRegistry.Instance.Namespaces[prefix]); continue; } var ns = match.Groups[2].Value; result.Namespaces.Add(prefix, new OpenGraphNamespace(prefix, ns)); } } else if (html != null && html.Attributes.Any(a => a.Name.StartsWith("xmlns:", StringComparison.InvariantCultureIgnoreCase))) { var namespaces = html.Attributes.Where(a => a.Name.StartsWith("xmlns:", StringComparison.InvariantCultureIgnoreCase)); foreach (var ns in namespaces) { #if NETSTANDARD2_1 var prefix = ns.Name.ToLowerInvariant().Replace("xmlns:", string.Empty, StringComparison.InvariantCultureIgnoreCase); #else var prefix = ns.Name.ToLowerInvariant().Replace("xmlns:", string.Empty); #endif result.Namespaces.Add(prefix, new OpenGraphNamespace(prefix, ns.Value)); } } else { // append the minimum og: prefix and namespace result.Namespaces.Add("og", NamespaceRegistry.Instance.Namespaces["og"]); } }
/// <summary> /// Validates the specification. /// </summary> /// <param name="result">The result.</param> /// <exception cref="InvalidSpecificationException">The parsed HTML does not meet the open graph specification, missing element: {required}.</exception> private static void ValidateSpecification(OpenGraph result) { var prefixes = result.Namespaces.Select(ns => ns.Value.Prefix); var namespaces = NamespaceRegistry .Instance .Namespaces .Where(ns => prefixes.Contains(ns.Key) && ns.Value.RequiredElements.Count > 0) .Select(ns => ns.Value) .ToList(); foreach (var ns in namespaces) { foreach (var required in ns.RequiredElements) { if (!result.Metadata.ContainsKey(string.Concat(ns.Prefix, ":", required))) { throw new InvalidSpecificationException($"The parsed HTML does not meet the open graph specification, missing element: {required}"); } } } }
/// <summary> /// Parses the HTML. /// </summary> /// <param name="result">The result.</param> /// <param name="content">The content.</param> /// <param name="validateSpecification">if set to <c>true</c> [validate specification].</param> /// <returns><see cref="OpenGraph"/>.</returns> /// <exception cref="OpenGraphNet.InvalidSpecificationException">The parsed HTML does not meet the open graph specification.</exception> private static OpenGraph ParseHtml(OpenGraph result, string content, bool validateSpecification = false) { HtmlDocument document = MakeDocumentToParse(content); ParseNamespaces(result, document); HtmlNodeCollection allMeta = document.DocumentNode.SelectNodes("//meta"); var openGraphMetaTags = from meta in allMeta ?? new HtmlNodeCollection(null) where (meta.Attributes.Contains("property") && MatchesNamespacePredicate(meta.Attributes["property"].Value)) || (meta.Attributes.Contains("name") && MatchesNamespacePredicate(meta.Attributes["name"].Value)) select meta; StructuredMetadata lastElement = null; foreach (HtmlNode metaTag in openGraphMetaTags) { var prefix = GetOpenGraphPrefix(metaTag); SetNamespace(result, prefix); if (!result.Namespaces.ContainsKey(prefix)) { continue; } string value = GetOpenGraphValue(metaTag); string property = GetOpenGraphKey(metaTag); var cleanProperty = CleanOpenGraphKey(prefix, property); value = HtmlDecodeUrl(property, value); if (lastElement != null && lastElement.IsMyProperty(property)) { lastElement.AddProperty(cleanProperty, value); } else if (IsChildOfExistingElement(result.internalOpenGraphData, property)) { var matchingElement = result.internalOpenGraphData.First(kvp => kvp.Value.First().IsMyProperty(property)); var element = matchingElement.Value.FirstOrDefault(e => !e.Properties.ContainsKey(cleanProperty)); element?.AddProperty(cleanProperty, value); } else { lastElement = new StructuredMetadata(result.Namespaces[prefix], cleanProperty, value); result.AddMetadata(lastElement); } } result.Type = string.Empty; if (result.internalOpenGraphData.TryGetValue("og:type", out var type)) { result.Type = (type.FirstOrDefault() ?? new NullMetadata()).Value ?? string.Empty; } result.Title = string.Empty; if (result.internalOpenGraphData.TryGetValue("og:title", out var title)) { result.Title = (title.FirstOrDefault() ?? new NullMetadata()).Value ?? string.Empty; } result.Image = GetUri(result, "og:image"); result.Url = GetUri(result, "og:url"); if (validateSpecification) { ValidateSpecification(result); } return(result); }
/// <summary> /// Parses the HTML for open graph content. /// </summary> /// <param name="content">The HTML to parse.</param> /// <param name="validateSpecification">if set to <c>true</c> verify that the document meets the required attributes of the open graph specification.</param> /// <returns><see cref="OpenGraph"/>.</returns> public static OpenGraph ParseHtml(string content, bool validateSpecification = false) { OpenGraph result = new OpenGraph(); return(ParseHtml(result, content, validateSpecification)); }
/// <summary> /// Makes the graph. /// </summary> /// <param name="title">The title.</param> /// <param name="type">The type.</param> /// <param name="image">The image.</param> /// <param name="url">The URL.</param> /// <param name="description">The description.</param> /// <param name="siteName">Name of the site.</param> /// <param name="audio">The audio.</param> /// <param name="video">The video.</param> /// <param name="locale">The locale.</param> /// <param name="localeAlternates">The locale alternates.</param> /// <param name="determiner">The determiner.</param> /// <returns><see cref="OpenGraph"/>.</returns> public static OpenGraph MakeGraph( string title, string type, string image, string url, string description = "", string siteName = "", string audio = "", string video = "", string locale = "", IList <string> localeAlternates = null, string determiner = "") { var graph = new OpenGraph { Title = title, Type = type, Image = new Uri(image, UriKind.Absolute), Url = new Uri(url, UriKind.Absolute), }; var ns = NamespaceRegistry.Instance.Namespaces["og"]; graph.Namespaces.Add(ns.Prefix, ns); graph.AddMetadata(new StructuredMetadata(ns, "title", title)); graph.AddMetadata(new StructuredMetadata(ns, "type", type)); graph.AddMetadata(new StructuredMetadata(ns, "image", image)); graph.AddMetadata(new StructuredMetadata(ns, "url", url)); if (!string.IsNullOrWhiteSpace(description)) { graph.AddMetadata(new StructuredMetadata(ns, "description", description)); } if (!string.IsNullOrWhiteSpace(siteName)) { graph.AddMetadata(new StructuredMetadata(ns, "site_name", siteName)); } if (!string.IsNullOrWhiteSpace(audio)) { graph.AddMetadata(new StructuredMetadata(ns, "audio", audio)); } if (!string.IsNullOrWhiteSpace(video)) { graph.AddMetadata(new StructuredMetadata(ns, "video", video)); } if (!string.IsNullOrWhiteSpace(locale)) { graph.AddMetadata(new StructuredMetadata(ns, "locale", locale)); } if (!string.IsNullOrWhiteSpace(determiner)) { graph.AddMetadata(new StructuredMetadata(ns, "determiner", determiner)); } if (graph.internalOpenGraphData.ContainsKey("og:locale")) { var localeElement = graph.internalOpenGraphData["og:locale"].First(); foreach (var localeAlternate in localeAlternates ?? new List <string>()) { localeElement.AddProperty(new PropertyMetadata("alternate", localeAlternate)); } } else { foreach (var localeAlternate in localeAlternates ?? new List <string>()) { graph.AddMetadata(new StructuredMetadata(ns, "locale:alternate", localeAlternate)); } } return(graph); }