Esempio n. 1
0
        /// <summary>
        /// Process each link encountered. The link will be recorded
        /// for later spidering if it is an http or https docuent,
        /// has not been visited before(determined by spider class),
        /// and is in the same host as the original base URL.
        /// </summary>
        /// <param name="link">The URL to process</param>
        private void ProcessLink(string link)
        {
            Uri url;

            // fully expand this URL if it was a relative link
            try
            {
                url = new Uri(m_uri, link, false);
            }
            catch (UriFormatException e)
            {
                System.Console.WriteLine("Invalid URI:" + link);
                return;
            }

            if (!url.Scheme.ToLower().Equals("http") &&
                !url.Scheme.ToLower().Equals("https"))
            {
                return;
            }

            // comment out this line if you would like to spider
            // the whole Internet (yeah right, but it will try)
            if (!url.Host.ToLower().Equals(m_uri.Host.ToLower()))
            {
                return;
            }

            //System.Console.WriteLine( "Queue:"+url );
            m_spider.addURI(url);
        }
Esempio n. 2
0
        private void ProcessLink(string link)
        {
            Uri url;

            url = new Uri(uri, link);
            if (!url.Scheme.ToLower().Equals("http") && !url.Scheme.ToLower().Equals("https"))
            {
                return;
            }
            if (!url.Host.ToLower().Equals(uri.Host.ToLower()))
            {
                return;
            }
            spider.addURI(url);
        }