Beispiel #1
0
        /// <summary>
        /// Called when the spider finds a URL.
        /// </summary>
        /// <param name="url">The URL that was found.</param>
        /// <param name="source">Where the URL was found.</param>
        /// <param name="type">What sort of tag produced this URL.</param>
        /// <returns></returns>
        public bool SpiderFoundURL(Uri url, Uri source,
                                   Spider.URLType type)
        {
            if ((this.baseHost != null) && (String.Compare(this.baseHost, url.Host, true) != 0))
            {
                return(false);
            }

            return(true);
        }
Beispiel #2
0
        /// <summary>
        /// Used internally, to add a URL to the spider's workload.
        /// </summary>
        /// <param name="u">The URL to add.</param>
        /// <param name="type">What type of link this is.</param>
        private void AddURL(String u, Spider.URLType type)
        {
            if (u == null)
            {
                return;
            }

            try
            {
                Uri url = URLUtility.constructURL(this.baseURL, u, true);
                url = this.spider.Workload.ConvertURL(url.ToString());

                if ((String.Compare(url.Scheme, "http", true) == 0) ||
                    (String.Compare(url.Scheme, "https", true) == 0))
                {
                    if (this.spider.Report.SpiderFoundURL(url, this.baseURL, type))
                    {
                        try
                        {
                            this.spider.AddURL(url, this.baseURL, this.depth + 1);
                        }
                        catch (WorkloadException e)
                        {
                            throw new IOException(e.Message);
                        }
                    }
                }
            }

            catch (UriFormatException)
            {
                spider.Logging.Log(Logger.Level.INFO, "Malformed URL found:" + u);
            }
            catch (WorkloadException)
            {
                spider.Logging.Log(Logger.Level.INFO, "Invalid URL found:" + u);
            }
        }
 /// <summary>
 /// Called when the spider encounters a URL. This function
 /// will always return true. Because this spider will
 /// theoretically visit every URL on the Internet, all
 /// URL's will be processed.
 /// </summary>
 /// <param name="url">The URL that the spider found.</param>
 /// <param name="source">The page that the URL was found on.</param>
 /// <param name="type">The type of link this URL is.</param>
 /// <returns>True if the spider should scan for links on this page.</returns>
 public bool SpiderFoundURL(Uri url, Uri source,
                            Spider.URLType type)
 {
     return(true);
 }