public void should_return_failure_if_uri_is_wrong_does_not_exists()
            {
                var parameters = new WebCrawlerParameters("so what?", _testFolder, 0, false);

                var resultOfValidation = parameters.Validate();

                Assert.That(resultOfValidation.Valid, Is.False);
                Assert.That(resultOfValidation.Messages.Any(), Is.True);
            }
            public void should_return_failure_if_test_folder_does_not_exists()
            {
                var parameters = new WebCrawlerParameters("http://www.ya.ru", "Z:\\1\\2\\3\\4", 0, false);

                var resultOfValidation = parameters.Validate();

                Assert.That(resultOfValidation.Valid, Is.False);
                Assert.That(resultOfValidation.Messages.Any(), Is.True);
            }
            public void should_return_success_if_parameters_are_correct()
            {
                var parameters = new WebCrawlerParameters("http://www.ya.ru", _testFolder, 0, false);

                var resultOfValidation = parameters.Validate();

                Assert.That(resultOfValidation.Valid, Is.True);
                Assert.That(resultOfValidation.Messages.Any(), Is.False);
            }
Пример #4
0
        public WebCrawlerResult DownloadWebSite(WebCrawlerParameters parameters, WebCrawlerWorkerType type = WebCrawlerWorkerType.Sync)
        {
            //NMB Тут посмотреть как лучше вернуть результат. Мапить или ещё чего
            var validationResult = parameters.Validate();
            if ( !validationResult.Valid ) return new WebCrawlerResult()
            {
                Success = false,
                Message = String.Join(Environment.NewLine, validationResult.Messages)
            };
            var resourceStack = CreateResourceStackWithInitialResource(parameters);

            IWorkerFactory workerFactory = new WorkerFactory();

            IWebCrawlerWorker worker = workerFactory.GetWorker(type);

            return worker.DonwlodWebSite(resourceStack);
        }
Пример #5
0
        private static void Measure(string folderPath, WebCrawlerWorkerType type)
        {
            Directory.CreateDirectory(folderPath);
            var parameters = new WebCrawlerParameters("http://nezhnova.de", folderPath, 1, true);
            IWebCrawler webCrawler = new WebCrawler();

            Console.WriteLine("Start crawling. " + folderPath);
            Stopwatch sw = new Stopwatch();

            sw.Start();

            webCrawler.DownloadWebSite(parameters, type);

            sw.Stop();
            
            FileHelpers.CleanDirectory(folderPath);

            Console.WriteLine("{0} .Elapsed={1}", folderPath, sw.Elapsed);
        }
Пример #6
0
        private  ResourcesStack CreateResourceStackWithInitialResource(WebCrawlerParameters parameters)
        {
            var resourceLocationManagerForUnprocessableResources = new ResourceLocationManager(parameters.PathToFolder);
            var resourceLocationManagerForPages = new PageResourceLocationManager(parameters.PathToFolder);

            var resourceStack = new ResourcesStack();

            IResourceFactory factory =
                new ResourceFactory(
                    new ResourceLoader(),
                    new FileSaver(),
                    resourceLocationManagerForUnprocessableResources,
                    resourceLocationManagerForPages,
                    new RelativePathProducer(resourceLocationManagerForUnprocessableResources),
                    new RelativePathProducer(resourceLocationManagerForPages),
                    parameters.InitialHostUri,
                    parameters.LoadOnlySelectedDomainPages,
                    resourceStack);

            var initialResource = factory.GetHtmlModel(parameters.Uri, parameters.MaxDeep);

            resourceStack.AddResourceToQueue(initialResource);
            return resourceStack;
        }