private void FixAllRelativeHrefs(HtmlDocument document, string url) { var nodes = document.DocumentNode.SelectNodes("//a[not(starts-with(@href,'http') or starts-with(@href,'https'))]"); if (nodes != null) { foreach (var node in nodes) { if (node.Attributes["href"] != null) { node.Attributes["href"].Value = UrlUtil.CanonicalizeUrl(node.Attributes["href"].Value, url); } } } var images = document.DocumentNode.SelectNodes(".//img"); if (images != null) { foreach (var image in images) { if (image.Attributes["src"] != null) { image.Attributes["src"].Value = UrlUtil.CanonicalizeUrl(image.Attributes["src"].Value, url); } } } }
/// <summary> /// 添加解析到的目标链接, 添加到队列中 /// </summary> /// <param name="url">链接</param> /// <param name="priority">优先级</param> /// <param name="increaseDeep">目标链接的深度是否升高</param> public void AddTargetRequest(string url, int priority = 0, bool increaseDeep = true) { if (string.IsNullOrWhiteSpace(url) || url.Equals("#") || url.StartsWith("javascript:")) { return; } var newUrl = UrlUtil.CanonicalizeUrl(url, Url); var request = new Request(newUrl, Request.Extras) { Priority = priority }; AddTargetRequest(request, increaseDeep); }