Exemplo n.º 1
0
        public void Crawl()
        {
            _crawlSetting.StartTime = DateTime.Now;

            DataLayer.Save(_crawlSetting);

            for (int i = 0; i < _hosts.Length; i++)
            {
                Queue <Webpage>  sharedQueue    = new Queue <Webpage>();
                object           sharedLock     = new object();
                HashSet <string> sharedPageHash = new HashSet <string>();
                HashSet <string> sharedFormHash = new HashSet <string>();

                CrawlingSharedResource sharedResource = new CrawlingSharedResource(_crawlSetting, _hosts[i], sharedQueue, sharedLock, sharedPageHash, sharedFormHash);
                CrawlerAgent[]         agents         = new CrawlerAgent[_threadsNumber];

                _aliveAgentsDic.Add(_hosts[i], 0);
                _agentsDic.Add(_hosts[i], agents);
                _hosts[i].StartTime = DateTime.Now;
                _hosts[i].CrawlId   = _crawlSetting.Id;

                DataLayer.Save(_hosts[i]);

                for (int j = 0; j < _threadsNumber; j++)
                {
                    agents[j] = new CrawlerAgent(sharedResource);
                    agents[j].CrawlAgentCompleted += agent_CrawlAgentCompleted;
                    agents[j].CrawlAnnounced      += agent_CrawlAnnounced;
                    agents[j].CrawlAgentStarted   += Crawler_CrawlAgentStarted;
                    agents[j].CrawlAsync();
                }
            }
        }
Exemplo n.º 2
0
 /// <summary>
 ///
 /// </summary>
 /// <param name="crawlerSetting">Settings for this job</param>
 /// <param name="host">the host to do crawling on it</param>
 /// <param name="threadsNumber">number of threads for crawling the specified host</param>
 public CrawlerAgent(CrawlingSharedResource sharedResource)
 {
     _sharedResource = sharedResource;
 }
Exemplo n.º 3
0
        public void Crawl()
        {
            _crawlSetting.StartTime = DateTime.Now;

            DataLayer.Save(_crawlSetting);

            for (int i = 0; i < _hosts.Length; i++)
            {
                Queue<Webpage> sharedQueue = new Queue<Webpage>();
                object sharedLock = new object();
                HashSet<string> sharedPageHash = new HashSet<string>();
                HashSet<string> sharedFormHash = new HashSet<string>();

                CrawlingSharedResource sharedResource = new CrawlingSharedResource(_crawlSetting, _hosts[i], sharedQueue, sharedLock, sharedPageHash, sharedFormHash);
                CrawlerAgent[] agents = new CrawlerAgent[_threadsNumber];

                _aliveAgentsDic.Add(_hosts[i], 0);
                _agentsDic.Add(_hosts[i], agents);
                _hosts[i].StartTime = DateTime.Now;
                _hosts[i].CrawlId = _crawlSetting.Id;

                DataLayer.Save(_hosts[i]);

                for (int j = 0; j < _threadsNumber; j++)
                {
                    agents[j] = new CrawlerAgent(sharedResource);
                    agents[j].CrawlAgentCompleted += agent_CrawlAgentCompleted;
                    agents[j].CrawlAnnounced += agent_CrawlAnnounced;
                    agents[j].CrawlAgentStarted += Crawler_CrawlAgentStarted;
                    agents[j].CrawlAsync();
                }
            }
        }
Exemplo n.º 4
0
 /// <summary>
 /// 
 /// </summary>
 /// <param name="crawlerSetting">Settings for this job</param>
 /// <param name="host">the host to do crawling on it</param>
 /// <param name="threadsNumber">number of threads for crawling the specified host</param>
 public CrawlerAgent(CrawlingSharedResource sharedResource)
 {
     _sharedResource = sharedResource;
 }