Пример #1
0
        private void FixAllRelativeHrefs(HtmlDocument document, string url)
        {
            var nodes = document.DocumentNode.SelectNodes("//a[not(starts-with(@href,'http') or starts-with(@href,'https'))]");

            if (nodes != null)
            {
                foreach (var node in nodes)
                {
                    if (node.Attributes["href"] != null)
                    {
                        node.Attributes["href"].Value = UrlUtil.CanonicalizeUrl(node.Attributes["href"].Value, url);
                    }
                }
            }

            var images = document.DocumentNode.SelectNodes(".//img");

            if (images != null)
            {
                foreach (var image in images)
                {
                    if (image.Attributes["src"] != null)
                    {
                        image.Attributes["src"].Value = UrlUtil.CanonicalizeUrl(image.Attributes["src"].Value, url);
                    }
                }
            }
        }
Пример #2
0
        /// <summary>
        /// 添加解析到的目标链接, 添加到队列中
        /// </summary>
        /// <param name="url">链接</param>
        /// <param name="priority">优先级</param>
        /// <param name="increaseDeep">目标链接的深度是否升高</param>
        public void AddTargetRequest(string url, int priority = 0, bool increaseDeep = true)
        {
            if (string.IsNullOrWhiteSpace(url) || url.Equals("#") || url.StartsWith("javascript:"))
            {
                return;
            }
            var newUrl  = UrlUtil.CanonicalizeUrl(url, Url);
            var request = new Request(newUrl, Request.Extras)
            {
                Priority = priority
            };

            AddTargetRequest(request, increaseDeep);
        }