public void Crawl() { _crawlSetting.StartTime = DateTime.Now; DataLayer.Save(_crawlSetting); for (int i = 0; i < _hosts.Length; i++) { Queue <Webpage> sharedQueue = new Queue <Webpage>(); object sharedLock = new object(); HashSet <string> sharedPageHash = new HashSet <string>(); HashSet <string> sharedFormHash = new HashSet <string>(); CrawlingSharedResource sharedResource = new CrawlingSharedResource(_crawlSetting, _hosts[i], sharedQueue, sharedLock, sharedPageHash, sharedFormHash); CrawlerAgent[] agents = new CrawlerAgent[_threadsNumber]; _aliveAgentsDic.Add(_hosts[i], 0); _agentsDic.Add(_hosts[i], agents); _hosts[i].StartTime = DateTime.Now; _hosts[i].CrawlId = _crawlSetting.Id; DataLayer.Save(_hosts[i]); for (int j = 0; j < _threadsNumber; j++) { agents[j] = new CrawlerAgent(sharedResource); agents[j].CrawlAgentCompleted += agent_CrawlAgentCompleted; agents[j].CrawlAnnounced += agent_CrawlAnnounced; agents[j].CrawlAgentStarted += Crawler_CrawlAgentStarted; agents[j].CrawlAsync(); } } }
void Crawler_CrawlAgentStarted(object sender, EventArgs e) { CrawlerAgent agent = sender as CrawlerAgent; Host host = agent.CrawlingSharedResource.Host; lock (_lock) { _aliveAgentsDic[host]++; if (_aliveAgentsDic.All(kv => kv.Value == _threadsNumber)) { OnCrawlStarted(this); } } }
void agent_CrawlAgentCompleted(object sender, EventArgs e) { CrawlerAgent agent = sender as CrawlerAgent; Host host = agent.CrawlingSharedResource.Host; lock (_lock) { _aliveAgentsDic[host]--; if (_aliveAgentsDic[host] == 0) { _agentsDic.Remove(host); host.FinishTime = DateTime.Now; DataLayer.Save(host); if (_agentsDic.Count == 0) { _crawlSetting.FinishTime = DateTime.Now; DataLayer.Save(_crawlSetting); OnCrawlCompleted(this); } } } }
public void Crawl() { _crawlSetting.StartTime = DateTime.Now; DataLayer.Save(_crawlSetting); for (int i = 0; i < _hosts.Length; i++) { Queue<Webpage> sharedQueue = new Queue<Webpage>(); object sharedLock = new object(); HashSet<string> sharedPageHash = new HashSet<string>(); HashSet<string> sharedFormHash = new HashSet<string>(); CrawlingSharedResource sharedResource = new CrawlingSharedResource(_crawlSetting, _hosts[i], sharedQueue, sharedLock, sharedPageHash, sharedFormHash); CrawlerAgent[] agents = new CrawlerAgent[_threadsNumber]; _aliveAgentsDic.Add(_hosts[i], 0); _agentsDic.Add(_hosts[i], agents); _hosts[i].StartTime = DateTime.Now; _hosts[i].CrawlId = _crawlSetting.Id; DataLayer.Save(_hosts[i]); for (int j = 0; j < _threadsNumber; j++) { agents[j] = new CrawlerAgent(sharedResource); agents[j].CrawlAgentCompleted += agent_CrawlAgentCompleted; agents[j].CrawlAnnounced += agent_CrawlAnnounced; agents[j].CrawlAgentStarted += Crawler_CrawlAgentStarted; agents[j].CrawlAsync(); } } }