public string GetTitle(HtmlDocument document)
 {
     return document.GetText("/html/head/title");
 }
 public string GetDescription(HtmlDocument document)
 {
     return document.GetText("/html/head/meta[@name='Description']");
 }
 public string GetDescription(HtmlDocument document)
 {
     return document.GetText("/html/body//*/div[@id='bodyContent']/div[@id='mw-content-text']/p");
 }
Exemple #4
0
        public static Metadata FetchAndParseMetadata(string rqurl)
        {
            var content = FetchMetadata(rqurl).Result;
            
            if(!string.IsNullOrWhiteSpace(content))
            {
                var html = new HtmlDocument();
                html.LoadHtml(content);

                var data = new Metadata {
                    Title = HttpUtility.HtmlDecode(html.DocumentNode.SelectSingleNode("//title")?.InnerText)
                };

                var titleTags = new List<string> { 
                    "//meta[@property='og:title']", 
                    "//meta[@property='twitter:title']" 
                };

                var descriptionTags = new List<string> {
                    "//meta[@name='description']",
                    "//meta[@property='og:description']",
                    "//meta[@property='twitter:description']"
                };

                var imageTags = new List<string> {
                    "//meta[@property='og:image']",
                    "//meta[@property='twitter:image']"
                };

                var urlTags = new List<string> {
                    "//meta[@property='og:url']",
                    "//meta[@property='twitter:url']"
                };

                titleTags.ForEach(xpath => {
                    var title = html.GetText(xpath);
                    if(!string.IsNullOrWhiteSpace(title))
                        data.Title = HttpUtility.HtmlDecode(title);
                });

                // descriptionTags.ForEach(xpath => {
                //     var description = html.GetText(xpath);
                //     if(!string.IsNullOrWhiteSpace(description))
                //         data.Description = description;
                // });

                // imageTags.ForEach(xpath => {
                //     var image = html.GetText(xpath);
                //     if(!string.IsNullOrWhiteSpace(image))
                //         data.Image = image;
                // });

                // urlTags.ForEach(xpath => {
                //     var url = html.GetText(xpath);
                //     if(!string.IsNullOrWhiteSpace(url))
                //         data.Url = url;
                // });

                return data;
            }

            return null;
        }