/// <summary> /// Process each link encountered. The link will be recorded /// for later spidering if it is an http or https docuent, /// has not been visited before(determined by spider class), /// and is in the same host as the original base URL. /// </summary> /// <param name="link">The URL to process</param> private void ProcessLink(string link) { Uri url; // fully expand this URL if it was a relative link try { url = new Uri(m_uri, link, false); } catch (UriFormatException e) { System.Console.WriteLine("Invalid URI:" + link); return; } if (!url.Scheme.ToLower().Equals("http") && !url.Scheme.ToLower().Equals("https")) { return; } // comment out this line if you would like to spider // the whole Internet (yeah right, but it will try) if (!url.Host.ToLower().Equals(m_uri.Host.ToLower())) { return; } //System.Console.WriteLine( "Queue:"+url ); m_spider.addURI(url); }
private void ProcessLink(string link) { Uri url; url = new Uri(uri, link); if (!url.Scheme.ToLower().Equals("http") && !url.Scheme.ToLower().Equals("https")) { return; } if (!url.Host.ToLower().Equals(uri.Host.ToLower())) { return; } spider.addURI(url); }