public void should_return_failure_if_uri_is_wrong_does_not_exists() { var parameters = new WebCrawlerParameters("so what?", _testFolder, 0, false); var resultOfValidation = parameters.Validate(); Assert.That(resultOfValidation.Valid, Is.False); Assert.That(resultOfValidation.Messages.Any(), Is.True); }
public void should_return_failure_if_test_folder_does_not_exists() { var parameters = new WebCrawlerParameters("http://www.ya.ru", "Z:\\1\\2\\3\\4", 0, false); var resultOfValidation = parameters.Validate(); Assert.That(resultOfValidation.Valid, Is.False); Assert.That(resultOfValidation.Messages.Any(), Is.True); }
public void should_return_success_if_parameters_are_correct() { var parameters = new WebCrawlerParameters("http://www.ya.ru", _testFolder, 0, false); var resultOfValidation = parameters.Validate(); Assert.That(resultOfValidation.Valid, Is.True); Assert.That(resultOfValidation.Messages.Any(), Is.False); }
public WebCrawlerResult DownloadWebSite(WebCrawlerParameters parameters, WebCrawlerWorkerType type = WebCrawlerWorkerType.Sync) { //NMB Тут посмотреть как лучше вернуть результат. Мапить или ещё чего var validationResult = parameters.Validate(); if ( !validationResult.Valid ) return new WebCrawlerResult() { Success = false, Message = String.Join(Environment.NewLine, validationResult.Messages) }; var resourceStack = CreateResourceStackWithInitialResource(parameters); IWorkerFactory workerFactory = new WorkerFactory(); IWebCrawlerWorker worker = workerFactory.GetWorker(type); return worker.DonwlodWebSite(resourceStack); }
private static void Measure(string folderPath, WebCrawlerWorkerType type) { Directory.CreateDirectory(folderPath); var parameters = new WebCrawlerParameters("http://nezhnova.de", folderPath, 1, true); IWebCrawler webCrawler = new WebCrawler(); Console.WriteLine("Start crawling. " + folderPath); Stopwatch sw = new Stopwatch(); sw.Start(); webCrawler.DownloadWebSite(parameters, type); sw.Stop(); FileHelpers.CleanDirectory(folderPath); Console.WriteLine("{0} .Elapsed={1}", folderPath, sw.Elapsed); }
private ResourcesStack CreateResourceStackWithInitialResource(WebCrawlerParameters parameters) { var resourceLocationManagerForUnprocessableResources = new ResourceLocationManager(parameters.PathToFolder); var resourceLocationManagerForPages = new PageResourceLocationManager(parameters.PathToFolder); var resourceStack = new ResourcesStack(); IResourceFactory factory = new ResourceFactory( new ResourceLoader(), new FileSaver(), resourceLocationManagerForUnprocessableResources, resourceLocationManagerForPages, new RelativePathProducer(resourceLocationManagerForUnprocessableResources), new RelativePathProducer(resourceLocationManagerForPages), parameters.InitialHostUri, parameters.LoadOnlySelectedDomainPages, resourceStack); var initialResource = factory.GetHtmlModel(parameters.Uri, parameters.MaxDeep); resourceStack.AddResourceToQueue(initialResource); return resourceStack; }