예제 #1
0
 protected Spider(IOptions <SpiderOptions> options,
                  DependenceServices services,
                  ILogger <Spider> logger
                  )
 {
     Logger            = logger;
     _services         = services;
     Options           = options.Value;
     _requestedQueue   = new RequestedQueue(Options);
     _requestSuppliers = new List <IRequestSupplier>();
     _dataFlows        = new List <IDataFlow>();
 }
예제 #2
0
        public void ParallelEnqueue()
        {
            var queue         = new RequestedQueue();
            var requestHasher = new RequestHasher(new MurmurHashAlgorithmService());

            Parallel.For(1, 10000, new ParallelOptions(), (i) =>
            {
                var request = new Request($"http://www.baidu.com/{i}")
                {
                    Timeout = 2000
                };
                requestHasher.ComputeHash(request);
                queue.Enqueue(request);
            });
        }
예제 #3
0
        protected Spider(IOptions <SpiderOptions> options,
                         DependenceServices services,
                         ILogger <Spider> logger
                         )
        {
            Logger  = logger;
            Options = options.Value;

            if (Options.Speed > 500)
            {
                throw new SpiderException("Speed should not large than 500");
            }

            _services         = services;
            _requestedQueue   = new RequestedQueue();
            _requestSuppliers = new List <IRequestSupplier>();
            _dataFlows        = new List <IDataFlow>();
        }
예제 #4
0
        public void Dequeue()
        {
            var queue   = new RequestedQueue();
            var request = new Request("http://www.baidu.com")
            {
                Timeout = 2000
            };
            var requestHasher = new RequestHasher(new MurmurHashAlgorithmService());

            requestHasher.ComputeHash(request);
            queue.Enqueue(request);
            Assert.Equal(1, queue.Count);
            Thread.Sleep(1000);
            var request2 = queue.Dequeue(request.Hash);

            Assert.NotNull(request2);
            Assert.Equal(request, request2);
            Assert.Equal(request.Hash, request2.Hash);
        }
예제 #5
0
        public void DequeueTimeout()
        {
            var queue   = new RequestedQueue();
            var request = new Request("http://www.baidu.com")
            {
                Timeout = 2000
            };
            var requestHasher = new RequestHasher(new MurmurHashAlgorithmService());

            requestHasher.ComputeHash(request);
            queue.Enqueue(request);
            Assert.Equal(1, queue.Count);
            Thread.Sleep(2500);
            Assert.Null(queue.Dequeue(request.Hash));
            var timeoutRequests = queue.GetAllTimeoutList();

            Assert.Single(timeoutRequests);
            Assert.Equal(request.Hash, timeoutRequests[0].Hash);
        }
예제 #6
0
파일: Spider.cs 프로젝트: zzti/DotnetSpider
        protected Spider(IOptions <SpiderOptions> options,
                         DependenceServices services,
                         ILogger <Spider> logger
                         )
        {
            Logger  = logger;
            Options = options.Value;

            if (Options.Speed > 500)
            {
                throw new SpiderException("Speed should not large than 500");
            }

            _services         = services;
            _requestedQueue   = new RequestedQueue();
            _requestSuppliers = new List <IRequestSupplier>();
            _dataFlows        = new List <IDataFlow>();

            _defaultDownloader = _services.HostBuilderContext.Properties.ContainsKey(Const.DefaultDownloader)
                                ? _services.HostBuilderContext.Properties[Const.DefaultDownloader]?.ToString()
                                : Const.Downloader.HttpClient;
        }
예제 #7
0
        public SpiderFactory(IOptions <SpiderOptions> options,
                             DependenceServices services,
                             ILogger <SpiderFactory> logger
                             )
        {
            Logger  = logger;
            Options = options.Value;
            Id      = Guid.NewGuid().ToString("N");

            if (Options.Speed > 500)
            {
                throw new ArgumentException("Speed should not large than 500");
            }

            _services       = services;
            _dataFlows      = new List <IDataFlow>();
            _requestedQueue = new RequestedQueue();

            _defaultDownloader = _services.HostBuilderContext.Properties.ContainsKey(Const.DefaultDownloader)
                ? _services.HostBuilderContext.Properties[Const.DefaultDownloader]?.ToString()
                : Const.Downloader.HttpClient;
        }
예제 #8
0
        public void ParallelDequeue()
        {
            var queue         = new RequestedQueue();
            var requestHasher = new RequestHasher(new MurmurHashAlgorithmService());
            var hashes        = new List <string>();

            for (var i = 0; i < 10000; ++i)
            {
                var request = new Request($"http://www.baidu.com/{i}")
                {
                    Timeout = 30000
                };
                requestHasher.ComputeHash(request);
                hashes.Add(request.Hash);
                queue.Enqueue(request);
            }

            Parallel.ForEach(hashes, new ParallelOptions(), (hash) =>
            {
                var request = queue.Dequeue(hash);
                Assert.NotNull(request);
            });
        }
예제 #9
0
        public void Enqueue()
        {
            var queue   = new RequestedQueue();
            var request = new Request("http://www.baidu.com")
            {
                Timeout = 2000
            };
            var requestHasher = new RequestHasher(new MurmurHashAlgorithmService());

            requestHasher.ComputeHash(request);
            queue.Enqueue(request);
            Assert.Equal(1, queue.Count);
            queue.Enqueue(request);
            Assert.Equal(1, queue.Count);
            var request2 = new Request("http://www.baidu.com/2")
            {
                Timeout = 2000
            };

            requestHasher.ComputeHash(request2);
            queue.Enqueue(request2);
            Assert.Equal(2, queue.Count);
        }