/// <summary> /// Updates the href for simple redirects. /// </summary> /// <param name="htmlSource">The HTML source.</param> /// <param name="linkHref">The link href.</param> /// <param name="newHref">The new href.</param> /// <returns></returns> public static string UpdateHref(string htmlSource, string linkHref, string newHref) { var doc = TextCleaner.CreateHtmlDocument(htmlSource); foreach (var node in doc.DocumentNode.SelectNodes("//a[@href='" + linkHref + "']") ?? Enumerable.Empty <HtmlNode>()) { node.Attributes["href"].Value = newHref; } return(TextCleaner.GetHtmlSource(doc)); }
/// <summary> /// Marks the specified invalid link with square brackets. /// </summary> /// <param name="htmlSource">The HTML source to update.</param> /// <param name="linkHref">The link href to alter.</param> /// <returns></returns> public static string MarkInvalid(string htmlSource, string linkHref) { var doc = TextCleaner.CreateHtmlDocument(htmlSource); foreach (var node in doc.DocumentNode.SelectNodes("//a[@href='" + linkHref + "']") ?? Enumerable.Empty <HtmlNode>()) { // don't add more than one pair of brackets, but ignore any just outside of this element node.InnerHtml = "[" + node.InnerHtml.TrimStart('[').TrimEnd(']') + "]"; } return(TextCleaner.GetHtmlSource(doc)); }
/// <summary> /// Finds the links in the supplied HTML source. /// </summary> /// <param name="html">The HTML.</param> /// <returns></returns> public static List <LinkDescription> FindLinks(string html) { var doc = TextCleaner.CreateHtmlDocument(html); var links = doc.DocumentNode.SelectNodes("//a[@href]") ?? Enumerable.Empty <HtmlNode>(); var result = new List <LinkDescription>(); foreach (var link in links) { var lnk = new LinkDescription(); lnk.LinkText = HttpUtility.HtmlDecode(link.InnerText); lnk.OriginalLink = link.GetAttributeValue("href", string.Empty); if (!lnk.OriginalLink.StartsWith("http", StringComparison.OrdinalIgnoreCase)) { // neither "http" nor "https" lnk.Result = LinkCheckSummary.Ignored; } result.Add(lnk); } return(result); }