Exemple #1
0
        //-------------------------------------------//

        /// <summary>
        /// Initialize, start reading domain files from the path specified.
        /// </summary>
        public UrlControl(CrawlSession session)
        {
            _session = session;
            // initialize the queue of domains to process
            _urls           = new Queue <Url>();
            _urlsLock       = new Lock();
            _loadingDomains = new Lock();
        }
Exemple #2
0
        //-------------------------------------------//

        /// <summary>
        /// Initialize the address processing which retrieves the processed addresses.
        /// </summary>
        public ParseControl(CrawlSession session)
        {
            _session = session;

            // initialize the skip collection
            _caches = new System.Collections.Generic.Dictionary <Crawler, CacheValue <Url> >();
            Stats   = new Stats(10000);

            _session.Crawlers.OnAdd    += OnAddCrawler;
            _session.Crawlers.OnRemove += OnRemoveCrawler;
        }
Exemple #3
0
        //-------------------------------------------//

        /// <summary>
        /// Initialize a host instance with the specified name and score.
        /// </summary>
        public Host(CrawlSession session, string name, int score, int count, bool isNew = false)
        {
            _session = session;

            Name   = name;
            _score = score;
            _count = count;
            New    = isNew;

            _newUrls = new ArrayRig <Url>();
            _oldUrls = new ArrayRig <Url>();

            _changed = true;
            _lock    = new Lock();
            _commit  = new Act(Commit);

            _scoreLog = true;
        }
Exemple #4
0
        //-------------------------------------------//

        /// <summary>
        /// Initialize a new Web Crawler.
        /// </summary>
        public Crawler(CrawlSession session)
        {
            _session = session;

            _timer = new Timer(3000, OnTimeout, true);
            _time  = new Timekeeper();

            // init the lock
            _lock = new Lock();

            // create the common tasks
            _stopped = new Act(OnStopped);
            _timeout = new Act(OnTimeout);

            _preConnect = new Act(PreConnect);
            _connect    = new Act(Connect);

            _preProcess  = new Act(PreProcess);
            _process     = new Act(Process);
            _postProcess = new Act(PostProcess);

            // cache common decoders
            _utf8  = Encoding.UTF8.GetDecoder();
            _ascii = Encoding.ASCII.GetDecoder();

            // start byte buffer
            _bytes = new byte[_session.CrawlerByteBuffer];
            // start the initial char buffer
            _chars = new char[Encoding.UTF8.GetMaxCharCount(_session.CrawlerByteBuffer)];
            // start the urls collection
            _urls = new Queue <Url>();

            // init the cookie container
            Cookies = new CookieContainer(_session.CrawlerMaxCookieCount, _session.CrawlerMaxCookieCount, _session.CrawlerMaxCookieSize);
            // set a default url
            Url = Url.Empty;
        }