public void Crawl() { _crawlSetting.StartTime = DateTime.Now; DataLayer.Save(_crawlSetting); for (int i = 0; i < _hosts.Length; i++) { Queue <Webpage> sharedQueue = new Queue <Webpage>(); object sharedLock = new object(); HashSet <string> sharedPageHash = new HashSet <string>(); HashSet <string> sharedFormHash = new HashSet <string>(); CrawlingSharedResource sharedResource = new CrawlingSharedResource(_crawlSetting, _hosts[i], sharedQueue, sharedLock, sharedPageHash, sharedFormHash); CrawlerAgent[] agents = new CrawlerAgent[_threadsNumber]; _aliveAgentsDic.Add(_hosts[i], 0); _agentsDic.Add(_hosts[i], agents); _hosts[i].StartTime = DateTime.Now; _hosts[i].CrawlId = _crawlSetting.Id; DataLayer.Save(_hosts[i]); for (int j = 0; j < _threadsNumber; j++) { agents[j] = new CrawlerAgent(sharedResource); agents[j].CrawlAgentCompleted += agent_CrawlAgentCompleted; agents[j].CrawlAnnounced += agent_CrawlAnnounced; agents[j].CrawlAgentStarted += Crawler_CrawlAgentStarted; agents[j].CrawlAsync(); } } }
/// <summary> /// /// </summary> /// <param name="crawlerSetting">Settings for this job</param> /// <param name="host">the host to do crawling on it</param> /// <param name="threadsNumber">number of threads for crawling the specified host</param> public CrawlerAgent(CrawlingSharedResource sharedResource) { _sharedResource = sharedResource; }
public void Crawl() { _crawlSetting.StartTime = DateTime.Now; DataLayer.Save(_crawlSetting); for (int i = 0; i < _hosts.Length; i++) { Queue<Webpage> sharedQueue = new Queue<Webpage>(); object sharedLock = new object(); HashSet<string> sharedPageHash = new HashSet<string>(); HashSet<string> sharedFormHash = new HashSet<string>(); CrawlingSharedResource sharedResource = new CrawlingSharedResource(_crawlSetting, _hosts[i], sharedQueue, sharedLock, sharedPageHash, sharedFormHash); CrawlerAgent[] agents = new CrawlerAgent[_threadsNumber]; _aliveAgentsDic.Add(_hosts[i], 0); _agentsDic.Add(_hosts[i], agents); _hosts[i].StartTime = DateTime.Now; _hosts[i].CrawlId = _crawlSetting.Id; DataLayer.Save(_hosts[i]); for (int j = 0; j < _threadsNumber; j++) { agents[j] = new CrawlerAgent(sharedResource); agents[j].CrawlAgentCompleted += agent_CrawlAgentCompleted; agents[j].CrawlAnnounced += agent_CrawlAnnounced; agents[j].CrawlAgentStarted += Crawler_CrawlAgentStarted; agents[j].CrawlAsync(); } } }