ParseResult ILinkParserStrategy.GetLinks(string page, string sourceUrl)
        {
            if (string.IsNullOrWhiteSpace(page))
            {
                return(new ParseResult(new string[0], new string[0]));
            }

            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(page);

            var links = doc.DocumentNode
                        .SelectNodes("//a")
                        .Select(p => p.GetAttributeValue("href", "not found"))
                        .Distinct()
                        .Select(x => URIHelper.GetAbsoluteLink(x, sourceUrl))
                        .Where(x => x != null)
                        .ToArray();

            var linksToProceed = links
                                 .Where(link => URIHelper.IsLinkLocalForDomain(link, sourceUrl))
                                 .ToArray();

            return(new ParseResult(links, linksToProceed));
        }
예제 #2
0
        public ParseResult GetLinks(string page, string sourceUrl)
        {
            if (string.IsNullOrWhiteSpace(page))
            {
                return(new ParseResult(new string[0], new string[0]));
            }

            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(page);

            var links = doc.DocumentNode
                        .SelectNodes("//a")
                        .Select(p => p.GetAttributeValue("href", "not found"))
                        .Distinct()
                        .Select(x => URIHelper.GetAbsoluteLink(x, sourceUrl))
                        .Where(x => x != null && !x.Contains(_excludedValue, StringComparison.InvariantCultureIgnoreCase))
                        .ToArray();

            var linksToProceed = links
                                 .Where(link => URIHelper.IsLinkLocalForDomain(link, sourceUrl))
                                 .ToArray();

            return(new ParseResult(links, linksToProceed));
        }