Exemplo n.º 1
0
        private void FixAllRelativeHrefs(HtmlDocument document, string url)
        {
            var nodes = document.DocumentNode.SelectNodes("//a[not(starts-with(@href,'http') or starts-with(@href,'https'))]");

            if (nodes != null)
            {
                foreach (var node in nodes)
                {
                    if (node.Attributes["href"] != null)
                    {
                        node.Attributes["href"].Value = UrlUtils.CanonicalizeUrl(node.Attributes["href"].Value, url);
                    }
                }
            }

            var images = document.DocumentNode.SelectNodes(".//img");

            if (images != null)
            {
                foreach (var image in images)
                {
                    if (image.Attributes["src"] != null)
                    {
                        image.Attributes["src"].Value = UrlUtils.CanonicalizeUrl(image.Attributes["src"].Value, url);
                    }
                }
            }
        }
Exemplo n.º 2
0
 /// <summary>
 /// Add urls to fetch
 /// </summary>
 /// <param name="requests"></param>
 /// <param name="priority"></param>
 public void AddTargetRequests(IList <string> requests, int priority)
 {
     if (requests == null || requests.Count == 0)
     {
         return;
     }
     lock (this)
     {
         foreach (string s in requests)
         {
             if (string.IsNullOrEmpty(s) || s.Equals("#") || s.StartsWith("javascript:"))
             {
                 continue;
             }
             string  s1      = UrlUtils.CanonicalizeUrl(s, Url);
             Request request = new Request(s1, Request.Extras)
             {
                 Priority = priority, Depth = Request.NextDepth
             };
             if (request.IsAvailable)
             {
                 TargetRequests.Add(request);
             }
         }
     }
 }
Exemplo n.º 3
0
 /**
  * add url to fetch
  *
  * @param requestString requestString
  */
 public void AddTargetRequest(string requestString)
 {
     if (string.IsNullOrEmpty(requestString) || requestString.Equals("#"))
     {
         return;
     }
     requestString = UrlUtils.CanonicalizeUrl(requestString, _url.ToString());
     _targetRequests.Add(new Request(requestString));
 }
Exemplo n.º 4
0
 public void AddTargetRequests(IList <string> requests)
 {
     foreach (string s in requests)
     {
         if (string.IsNullOrEmpty(s) || s.Equals("#") || s.StartsWith("javascript:"))
         {
             continue;
         }
         string s1 = UrlUtils.CanonicalizeUrl(s, _url.ToString());
         _targetRequests.Add(new Request(s1, _request?.Extras));
     }
 }
Exemplo n.º 5
0
        /// <summary>
        /// Add url to fetch
        /// </summary>
        /// <param name="requestString"></param>
        public void AddTargetRequest(string requestString)
        {
            lock (this)
            {
                if (string.IsNullOrEmpty(requestString) || requestString.Equals("#"))
                {
                    return;
                }

                requestString = UrlUtils.CanonicalizeUrl(requestString, Url);
                TargetRequests.Add(new Request(requestString, Request.NextDepth, Request.Extras));
            }
        }
Exemplo n.º 6
0
 /**
  * add urls to fetch
  *
  * @param requests requests
  * @param priority priority
  */
 public void AddTargetRequests(List <string> requests, long priority)
 {
     for (var i = 0; i < requests.Count; ++i)
     {
         var s = requests[i];
         if (string.IsNullOrEmpty(s) || s.Equals("#") || s.StartsWith("javascript:"))
         {
             continue;
         }
         requests[i] = UrlUtils.CanonicalizeUrl(requests[i], _url.ToString());
         _targetRequests.Add(new Request(requests[i]).SetPriority(priority));
     }
 }
Exemplo n.º 7
0
 /// <summary>
 /// Add urls to fetch
 /// </summary>
 /// <param name="requests"></param>
 public void AddTargetRequests(IList <string> requests)
 {
     lock (this)
     {
         foreach (string s in requests)
         {
             if (string.IsNullOrEmpty(s) || s.Equals("#") || s.StartsWith("javascript:"))
             {
                 continue;
             }
             string s1 = UrlUtils.CanonicalizeUrl(s, Url);
             TargetRequests.Add(new Request(s1, Request.NextDepth, Request.Extras));
         }
     }
 }
Exemplo n.º 8
0
        // 问题太多, 如果有需要移到实体类的Expression中处理
        internal void FixAllRelativeHrefs(string url)
        {
            var nodes = Document.SelectNodes("//a[not(starts-with(@href,'http') or starts-with(@href,'https'))]");

            if (nodes != null)
            {
                foreach (var node in nodes)
                {
                    if (node.Attributes["href"] != null)
                    {
                        node.Attributes["href"].Value = UrlUtils.CanonicalizeUrl(node.Attributes["href"].Value, url);
                    }
                }
            }
        }
Exemplo n.º 9
0
        public void TestFixRelativeUrl()
        {
            string absoluteUrl = UrlUtils.CanonicalizeUrl("?aa", "http://www.dianping.com/sh/ss/com");

            Assert.Equal(absoluteUrl, "http://www.dianping.com/sh/ss/com?aa");

            absoluteUrl = UrlUtils.CanonicalizeUrl("../aa", "http://www.dianping.com/sh/ss/com");
            Assert.Equal(absoluteUrl, "http://www.dianping.com/sh/aa");

            absoluteUrl = UrlUtils.CanonicalizeUrl("..aa", "http://www.dianping.com/sh/ss/com");
            Assert.Equal(absoluteUrl, "http://www.dianping.com/sh/ss/..aa");

            absoluteUrl = UrlUtils.CanonicalizeUrl("../../aa", "http://www.dianping.com/sh/ss/com/");
            Assert.Equal(absoluteUrl, "http://www.dianping.com/sh/aa");

            absoluteUrl = UrlUtils.CanonicalizeUrl("../../aa", "http://www.dianping.com/sh/ss/com");
            Assert.Equal(absoluteUrl, "http://www.dianping.com/aa");
        }
Exemplo n.º 10
0
        /// <summary>
        /// Add url to fetch
        /// </summary>
        /// <param name="requestString"></param>
        public void AddTargetRequest(string requestString)
        {
            lock (this)
            {
                if (string.IsNullOrEmpty(requestString) || requestString.Equals("#"))
                {
                    return;
                }

                requestString = UrlUtils.CanonicalizeUrl(requestString, Url);
                var request = new Request(requestString, Request.Extras)
                {
                    Depth = Request.NextDepth
                };
                if (request.IsAvailable)
                {
                    TargetRequests.Add(request);
                }
            }
        }