public string GetTitle(HtmlDocument document) { return document.GetText("/html/head/title"); }
public string GetDescription(HtmlDocument document) { return document.GetText("/html/head/meta[@name='Description']"); }
public string GetDescription(HtmlDocument document) { return document.GetText("/html/body//*/div[@id='bodyContent']/div[@id='mw-content-text']/p"); }
public static Metadata FetchAndParseMetadata(string rqurl) { var content = FetchMetadata(rqurl).Result; if(!string.IsNullOrWhiteSpace(content)) { var html = new HtmlDocument(); html.LoadHtml(content); var data = new Metadata { Title = HttpUtility.HtmlDecode(html.DocumentNode.SelectSingleNode("//title")?.InnerText) }; var titleTags = new List<string> { "//meta[@property='og:title']", "//meta[@property='twitter:title']" }; var descriptionTags = new List<string> { "//meta[@name='description']", "//meta[@property='og:description']", "//meta[@property='twitter:description']" }; var imageTags = new List<string> { "//meta[@property='og:image']", "//meta[@property='twitter:image']" }; var urlTags = new List<string> { "//meta[@property='og:url']", "//meta[@property='twitter:url']" }; titleTags.ForEach(xpath => { var title = html.GetText(xpath); if(!string.IsNullOrWhiteSpace(title)) data.Title = HttpUtility.HtmlDecode(title); }); // descriptionTags.ForEach(xpath => { // var description = html.GetText(xpath); // if(!string.IsNullOrWhiteSpace(description)) // data.Description = description; // }); // imageTags.ForEach(xpath => { // var image = html.GetText(xpath); // if(!string.IsNullOrWhiteSpace(image)) // data.Image = image; // }); // urlTags.ForEach(xpath => { // var url = html.GetText(xpath); // if(!string.IsNullOrWhiteSpace(url)) // data.Url = url; // }); return data; } return null; }