//-------------------------------------------// /// <summary> /// Initialize, start reading domain files from the path specified. /// </summary> public UrlControl(CrawlSession session) { _session = session; // initialize the queue of domains to process _urls = new Queue <Url>(); _urlsLock = new Lock(); _loadingDomains = new Lock(); }
//-------------------------------------------// /// <summary> /// Initialize the address processing which retrieves the processed addresses. /// </summary> public ParseControl(CrawlSession session) { _session = session; // initialize the skip collection _caches = new System.Collections.Generic.Dictionary <Crawler, CacheValue <Url> >(); Stats = new Stats(10000); _session.Crawlers.OnAdd += OnAddCrawler; _session.Crawlers.OnRemove += OnRemoveCrawler; }
//-------------------------------------------// /// <summary> /// Initialize a host instance with the specified name and score. /// </summary> public Host(CrawlSession session, string name, int score, int count, bool isNew = false) { _session = session; Name = name; _score = score; _count = count; New = isNew; _newUrls = new ArrayRig <Url>(); _oldUrls = new ArrayRig <Url>(); _changed = true; _lock = new Lock(); _commit = new Act(Commit); _scoreLog = true; }
//-------------------------------------------// /// <summary> /// Initialize a new Web Crawler. /// </summary> public Crawler(CrawlSession session) { _session = session; _timer = new Timer(3000, OnTimeout, true); _time = new Timekeeper(); // init the lock _lock = new Lock(); // create the common tasks _stopped = new Act(OnStopped); _timeout = new Act(OnTimeout); _preConnect = new Act(PreConnect); _connect = new Act(Connect); _preProcess = new Act(PreProcess); _process = new Act(Process); _postProcess = new Act(PostProcess); // cache common decoders _utf8 = Encoding.UTF8.GetDecoder(); _ascii = Encoding.ASCII.GetDecoder(); // start byte buffer _bytes = new byte[_session.CrawlerByteBuffer]; // start the initial char buffer _chars = new char[Encoding.UTF8.GetMaxCharCount(_session.CrawlerByteBuffer)]; // start the urls collection _urls = new Queue <Url>(); // init the cookie container Cookies = new CookieContainer(_session.CrawlerMaxCookieCount, _session.CrawlerMaxCookieCount, _session.CrawlerMaxCookieSize); // set a default url Url = Url.Empty; }