public WebSpider(string startUri, string baseUri, int uriProcessedCountMax, bool keepWebContent, IWebPageProcessor webPageProcessor) { StartUri = new Uri(startUri); // In future this could be null and will process cross-site, but for now must exist BaseUri = new Uri(Is.EmptyString(baseUri) ? m_startUri.GetLeftPart(UriPartial.Authority) : baseUri); UriProcessedCountMax = uriProcessedCountMax; KeepWebContent = keepWebContent; m_webPagesPending = new Queue( ); m_webPages = new Hashtable( ); m_webPageProcessor = webPageProcessor; m_webPageProcessor.ContentHandler += new WebPageContentDelegate(this.HandleLinks); }
public WebSpider(string startUri, string baseUri, int uriProcessedCountMax, bool keepWebContent, IWebPageProcessor webPageProcessor) { startUri_ = new Uri(startUri); // In future this could be null and will process cross-site, but for now must exist baseUri_ = new Uri(Is.EmptyString(baseUri) ? startUri_.GetLeftPart(UriPartial.Authority) : baseUri); uriProcessedCountMax_ = uriProcessedCountMax; keepWebContent_ = keepWebContent; webPagesPending_ = new Queue(); webPages_ = new Hashtable(); webPageProcessor_ = webPageProcessor; webPageProcessor_.contentHandler_ += new WebPageContentDelegate(this.HandleLinks); }