public IDataParser GetProvider(CrawlerName name) { //ToDo check general naming conventions in complete solution var adapter = Providers.FirstOrDefault(a => a.GetType().Name.Replace("DataParser", "") == name.ToString()); var existsAdapter = adapter != null; if (!existsAdapter) { throw new ArgumentException($"DataProvider {name} was requested, but not registered. Please register the DataProvider."); } return(adapter); }
protected CrawlerBase(IEnumerable <Uri> searchResults, CrawlerName name, SeleniumTaskScheduler scheduler, ProjectService projectService, DataParserProvider dataProviderFactory, Database.Database database, ILogger logger) { _projectService = projectService; _database = database; _logger = logger; _dataProvider = dataProviderFactory.GetProvider(name); _foundProjects = new BlockingCollection <Uri>(); _parsedProjects = new BlockingCollection <Uri>(); HtmlParser = new HtmlParser(); TokenSource = new CancellationTokenSource(); SearchResults = searchResults; Scheduler = scheduler; Name = name; }
public async Task <Task> StoreProjects(CrawlerName crawlerName, Project project) { var collection = _database.GetCollection <Project>(crawlerName.ToString()); var filter = Builders <Project> .Filter.Eq(x => x.ReferenceNumber, project.ReferenceNumber); var storedProject = await collection.Find(filter).SingleOrDefaultAsync(); if (storedProject != null) { if (HasChanges(storedProject, project)) { project.Id = storedProject.Id; project.IsNew = true; await collection.ReplaceOneAsync(filter, project); } } else { project.IsNew = true; await collection.InsertOneAsync(project); } return(Task.CompletedTask); }
public CrawlerReport(CrawlerName name) { Name = name; }