Esempio n. 1
0
        /// <summary>
        /// Updates the href for simple redirects.
        /// </summary>
        /// <param name="htmlSource">The HTML source.</param>
        /// <param name="linkHref">The link href.</param>
        /// <param name="newHref">The new href.</param>
        /// <returns></returns>
        public static string UpdateHref(string htmlSource, string linkHref, string newHref)
        {
            var doc = TextCleaner.CreateHtmlDocument(htmlSource);

            foreach (var node in doc.DocumentNode.SelectNodes("//a[@href='" + linkHref + "']") ?? Enumerable.Empty <HtmlNode>())
            {
                node.Attributes["href"].Value = newHref;
            }

            return(TextCleaner.GetHtmlSource(doc));
        }
Esempio n. 2
0
        /// <summary>
        /// Marks the specified invalid link with square brackets.
        /// </summary>
        /// <param name="htmlSource">The HTML source to update.</param>
        /// <param name="linkHref">The link href to alter.</param>
        /// <returns></returns>
        public static string MarkInvalid(string htmlSource, string linkHref)
        {
            var doc = TextCleaner.CreateHtmlDocument(htmlSource);

            foreach (var node in doc.DocumentNode.SelectNodes("//a[@href='" + linkHref + "']") ?? Enumerable.Empty <HtmlNode>())
            {
                // don't add more than one pair of brackets, but ignore any just outside of this element
                node.InnerHtml = "[" + node.InnerHtml.TrimStart('[').TrimEnd(']') + "]";
            }

            return(TextCleaner.GetHtmlSource(doc));
        }
Esempio n. 3
0
        /// <summary>
        /// Finds the links in the supplied HTML source.
        /// </summary>
        /// <param name="html">The HTML.</param>
        /// <returns></returns>
        public static List <LinkDescription> FindLinks(string html)
        {
            var doc = TextCleaner.CreateHtmlDocument(html);

            var links = doc.DocumentNode.SelectNodes("//a[@href]") ?? Enumerable.Empty <HtmlNode>();

            var result = new List <LinkDescription>();

            foreach (var link in links)
            {
                var lnk = new LinkDescription();
                lnk.LinkText     = HttpUtility.HtmlDecode(link.InnerText);
                lnk.OriginalLink = link.GetAttributeValue("href", string.Empty);
                if (!lnk.OriginalLink.StartsWith("http", StringComparison.OrdinalIgnoreCase))
                {
                    // neither "http" nor "https"
                    lnk.Result = LinkCheckSummary.Ignored;
                }

                result.Add(lnk);
            }

            return(result);
        }