Exemplo n.º 1
0
        /// <summary>
        /// This method is called by main to check a link. After
        /// spidering through the site, the final list of bad links
        /// is displayed.
        /// </summary>
        /// <param name="url">The URL to check for bad links.</param>
        public void check(Uri url)
        {
            SpiderOptions options = new SpiderOptions();
            options.WorkloadManager = typeof(MemoryWorkloadManager).FullName;
            LinkReport report = new LinkReport();
            Spider spider = new Spider(options, report);
            spider.AddURL(url, null, 1);

            spider.Process();
            Console.WriteLine(spider.Status);

            if (report.Bad.Count > 0)
            {
                Console.WriteLine("Bad Links Found:");
                foreach (String str in report.Bad)
                {
                    Console.WriteLine(str);
                }
            }
            else
            {
                Console.WriteLine("No bad links were found.");
            }

        }
 /// <summary>
 /// Construct a SpiderParseHTML object. This object allows
 /// you to parse HTML, while the spider collects link
 /// information in the background.
 /// </summary>
 /// <param name="baseURL">The URL that is being parsed, this is used for relative links.</param>
 /// <param name="istream">The InputStream being parsed.</param>
 /// <param name="spider">The Spider that is parsing.</param>
 public SpiderParseHTML(Uri baseURL, SpiderInputStream istream, Spider spider)
     : base(istream)
 {
     this.stream = istream;
     this.spider = spider;
     this.baseURL = baseURL;
     this.depth = spider.Workload.GetDepth(baseURL);
 }
 /// <summary>
 /// Download an entire site.
 /// </summary>
 /// <param name="config">The spider configuration file to use.</param>
 /// <param name="baseHost">The URL to start from.</param>
 /// <param name="local">The local path to save files to.</param>
 public void Download(String config, Uri baseHost, String local)
 {
     WorldSpiderReport report = new WorldSpiderReport(local);
     SpiderOptions options = new SpiderOptions();
     options.Load(config);
     Spider spider = new Spider(options, report);
     spider.AddURL(baseHost, null, 1);
     spider.Process();
     Console.WriteLine(spider.Status);
 }
        /// <summary>
        /// Download an entire site.
        /// </summary>
        /// <param name="config">The spider configuration file to use.</param>
        /// <param name="baseURL">The URL to start from.></param>
        /// <param name="local">The local path to save files to.</param>
        public void Download(String config, Uri baseURL, String local)
        {
            SpiderReport report = new SpiderReport(local);
            SpiderOptions options = new SpiderOptions();
            options.Load(config);
            Spider spider = new Spider(options, report);
            spider.Logging.Console = true;
            spider.Logging.Filename = "c:\\spider.log";
            spider.Logging.Clear();

            spider.AddURL(baseURL, null, 1);
            spider.Process();
            Console.WriteLine(spider.Status);
        }
Exemplo n.º 5
0
        /// <summary>
        /// Called when the spider finds a URL.
        /// </summary>
        /// <param name="url">The URL that was found.</param>
        /// <param name="source">Where the URL was found.</param>
        /// <param name="type">What sort of tag produced this URL.</param>
        /// <returns></returns>
        public bool SpiderFoundURL(Uri url, Uri source,
            Spider.URLType type)
        {
            if ((this.baseHost != null) && (String.Compare(this.baseHost, url.Host, true) != 0))
            {
                return false;
            }

            return true;
        }
Exemplo n.º 6
0
 /// <summary>
 /// Called when the spider is starting up. This method
 /// provides the SpiderReportable class with the spider
 /// object.
 /// </summary>
 /// <param name="spider">The spider that will be working with this object.</param>
 public void Init(Spider spider)
 {
     this.spider = spider;
 }
        /// <summary>
        /// Used internally, to add a URL to the spider's workload.
        /// </summary>
        /// <param name="u">The URL to add.</param>
        /// <param name="type">What type of link this is.</param>
        private void AddURL(String u, Spider.URLType type)
        {
            if (u == null)
            {
                return;
            }

            try
            {
                Uri url = URLUtility.constructURL(this.baseURL, u, true);
                url = this.spider.Workload.ConvertURL(url.ToString());

                if ((String.Compare(url.Scheme, "http", true) == 0)
                    || (String.Compare(url.Scheme, "https", true) == 0))
                {
                    if (this.spider.Report.SpiderFoundURL(url, this.baseURL, type))
                    {
                        try
                        {
                            this.spider.AddURL(url, this.baseURL, this.depth + 1);
                        }
                        catch (WorkloadException e)
                        {
                            throw new IOException(e.Message);
                        }
                    }
                }
            }

            catch (UriFormatException)
            {
                spider.Logging.Log(Logger.Level.INFO, "Malformed URL found:" + u);
            }
            catch (WorkloadException)
            {
                spider.Logging.Log(Logger.Level.INFO, "Invalid URL found:" + u);
            }
        }
 /// <summary>
 /// Called when the spider encounters a URL. This function
 /// will always return true. Because this spider will
 /// theoretically visit every URL on the Internet, all
 /// URL's will be processed.
 /// </summary>
 /// <param name="url">The URL that the spider found.</param>
 /// <param name="source">The page that the URL was found on.</param>
 /// <param name="type">The type of link this URL is.</param>
 /// <returns>True if the spider should scan for links on this page.</returns>
 public bool SpiderFoundURL(Uri url, Uri source,
     Spider.URLType type)
 {
     return true;
 }
 /// <summary>
 /// Not used.
 /// </summary>
 /// <param name="spider">Not used.</param>
 public void Init(Spider spider)
 {
 }