private async Task ProcessTaxonomicUnits(ITISPlantInfoTransformer transformer, PlantInfoProcessor processor, List <TaxonomicUnit> taxonomicUnits) { var plantInfos = new List <PlantInfo>(); foreach (var taxonomicUnit in taxonomicUnits.GroupBy(t => t.Tsn)) { var species = taxonomicUnit.First(); if (species != null) { try { var plantInfoResults = transformer.Transform(taxonomicUnit); plantInfos.AddRange(plantInfoResults); } catch (Exception ex) { _logger.LogError($"Unable to process {taxonomicUnit.Key} {species} {ex.Message}", ex); } } } var plantInfosResult = await processor.Process(plantInfos); foreach (var plantInfoResult in plantInfosResult) { _logger.LogInformation("CommonName" + ": " + plantInfoResult.CommonName + " ScientificName" + ": " + plantInfoResult.ScientificName + " PlantInfoId" + ": " + plantInfoResult.PlantInfoId); } }
public async Task TestITISPlantInfoProcessor() { var transformer = new ITISPlantInfoTransformer(); var itisData = ITISPlantInfoData(); var plantInfos = new List <PlantInfo>(); itisData.ForEach(i => plantInfos.AddRange(transformer.Transform(new List <TaxonomicUnit> { i }))); var originRepository = RepositoryMocks.GetStandardMockOriginRepository(new List <Stores.Origin>()); var locationRepository = RepositoryMocks.GetStandardMockLocationRepository(new List <Stores.Location>()); var lifeformRepository = RepositoryMocks.GetStandardMockLifeformRepository(new List <Stores.Lifeform>()); var plantInfoRepository = RepositoryMocks.GetStandardMockPlantInfoRepository(new List <Stores.PlantInfo>()); var plantLocationRepository = RepositoryMocks.GetStandardMockPlantLocationRepository(new List <Stores.PlantLocation>()); var synonymRepository = RepositoryMocks.GetStandardMockSynonymRepository(); var taxonRepository = RepositoryMocks.GetStandardMockTaxonRepository(new List <Stores.Taxon>()); var plantSynonymRepository = new Mock <IRepository <Stores.PlantSynonym> >(); var plantInfoIndex = SearchMocks.GetStandardMockPlantInfoIndex(); var lifeformIndex = SearchMocks.GetStandardMockLifeformIndex(); var locationService = new LocationService(locationRepository.Object); var originService = new OriginService(originRepository.Object, locationService); var lifeformService = new LifeformService(lifeformRepository.Object, lifeformIndex.Object); var plantInfoService = new PlantInfoService(plantInfoRepository.Object, plantLocationRepository.Object, plantInfoIndex.Object); var synonymService = new SynonymService(synonymRepository.Object); var taxonService = new TaxonService(taxonRepository.Object, synonymService); var processor = new PlantInfoProcessor(lifeformService, originService, plantInfoService, taxonService, locationService); await processor.InitializeOrigin(transformer.Origin); await processor.InitializeLifeforms(); await processor.InitializeTaxons(); var result = await processor.Process(plantInfos); result.Count(p => p.ScientificName == "Glandularia quadrangulata").Should().Be(1); result.Count(p => p.Taxon.Subfamily == null).Should().Be(5); result.Count(p => p.Taxon.Species == "cremersii").Should().Be(1); result.Count(p => p.Taxon.Form == "viridifolia").Should().Be(1); result.Count(p => p.Taxon.Subspecies == "purpurea").Should().Be(1); result.Count(p => p.Taxon.Variety == "graminea").Should().Be(1); result.Where(p => p.Locations != null).SelectMany(p => p.Locations).Count().Should().Be(3); result.Where(p => p.Locations != null).SelectMany(p => p.Locations).Count(l => l.Status == LocationStatus.Native).Should().Be(3); result.Select(p => p.Origin).DistinctBy(o => o.OriginId).Count().Should().Be(5); }
public void TestITISPlantInfoTransformer() { var transformer = new ITISPlantInfoTransformer(); var itisData = ITISPlantInfoData(); var result = new List <PlantInfo>(); itisData.ForEach(i => result.AddRange(transformer.Transform(new List <TaxonomicUnit> { i }))); result.Count(p => p.ScientificName == "Glandularia quadrangulata").Should().Be(1); result.Count(p => p.Taxon.Subfamily == null).Should().Be(6); result.Count(p => p.Taxon.Species == "cremersii").Should().Be(2); result.Count(p => p.Taxon.Form == "viridifolia").Should().Be(2); result.Count(p => p.Taxon.Subspecies == "purpurea").Should().Be(1); result.Count(p => p.Taxon.Variety == "graminea").Should().Be(1); result.Where(p => p.Locations != null).SelectMany(p => p.Locations).Count().Should().Be(3); result.SelectMany(p => p.Locations).DistinctBy(l => l.Location.Region).Count().Should().Be(1); result.Select(p => p.Origin).Count().Should().Be(6); }
public async Task Run(string[] args) { var importers = LoadImporters(Configuration); string dataDirectory; if (args.Length != 0) { dataDirectory = args[0]; } else { dataDirectory = Configuration["dataDirectory"]; } foreach (var importer in importers.Where(i => i.IsActive)) { if (importer.Type == ImporterType.TextImporter && importer.ImportModel == "TaxonomicUnit") { var processor = _importTransformOrchestrator.GetPlantInfoProcessor; var transformer = new USDATransformer(); var startRow = 1; var batchSize = 100; await processor.InitializeOrigin(transformer.Origin); await processor.InitializeLifeforms(); await processor.InitializeTaxons(); var dataFile = FileHelpers.GetDatafileName(importer.Filename, dataDirectory); var textImporter = new TextImporter <Checklist>(dataFile, importer.HasHeaders); var row = 1; var checklists = new List <Checklist>(); await foreach (var result in textImporter.Import()) { row++; if (row < startRow) { continue; } else if (row % batchSize != 0) { checklists.Add(result); } else { if (checklists.Any()) { await ProcessChecklists(transformer, processor, checklists); checklists.Clear(); } } if (checklists.Any()) { await ProcessChecklists(transformer, processor, checklists); checklists.Clear(); } } } else if (importer.Type == ImporterType.SqlImporter && importer.ImportModel == "TaxonomicUnit") { var processor = _importTransformOrchestrator.GetPlantInfoProcessor; var sqlImporter = new SqlImporter <TaxonomicUnit>(importer.ConnectionString, importer.SqlQuery); var transformer = new ITISPlantInfoTransformer(); var startRow = 0; var batchSize = 100; var row = 0; var taxonomicUnits = new List <TaxonomicUnit>(); await processor.InitializeOrigin(transformer.Origin); await processor.InitializeLifeforms(); await processor.InitializeTaxons(); await foreach (var result in sqlImporter.Import()) { row++; if (row < startRow) { continue; } else if (row % batchSize != 0) { taxonomicUnits.Add(result); } else { if (taxonomicUnits.Any()) { await ProcessTaxonomicUnits(transformer, processor, taxonomicUnits); taxonomicUnits.Clear(); } } } if (taxonomicUnits.Any()) { await ProcessTaxonomicUnits(transformer, processor, taxonomicUnits); taxonomicUnits.Clear(); } } else if (importer.Type == ImporterType.SqlImporter && importer.ImportModel == "Vernacular") { var processor = _importTransformOrchestrator.GetSynonymProcessor; var sqlImporter = new SqlImporter <Vernacular>(importer.ConnectionString, importer.SqlQuery); var transformer = new ITISSynonymTransformer(); var startRow = 0; var batchSize = 500; var row = 0; var vernaculars = new List <Vernacular>(); await processor.InitializeOrigin(transformer.Origin); await processor.InitializeTaxons(); await foreach (var result in sqlImporter.Import()) { row++; if (row < startRow) { #pragma warning disable S3626 // Jump statements should not be redundant continue; #pragma warning restore S3626 // Jump statements should not be redundant } else if (row % batchSize != 0) { vernaculars.Add(result); } else { await ProcessSynonyms(transformer, processor, vernaculars); vernaculars.Clear(); } } if (vernaculars.Any()) { await ProcessSynonyms(transformer, processor, vernaculars); vernaculars.Clear(); } } else if (importer.Type == ImporterType.EFImporter && importer.ImportModel == "PlantInfo") { var processor = _importTransformOrchestrator.GetElasticPlantInfoProcessor; var finished = false; var counter = 0; var counterEnd = 40000; while (!finished && counter < counterEnd) { //var response = await processor.Process(counter, counter + 1000); var response = await processor.ProcessSome(counter, 1000); Console.WriteLine($"Successes: {response.Successes} / Failures: {response.Failures}"); if (response.Failures > 0) { throw new Exception("Errors! Noooooo!"); } else if (response.Successes == 0) { finished = true; } counter += 1000; } } else if (importer.Type == ImporterType.EFImporter && importer.ImportModel == "Specimen") { var processor = _importTransformOrchestrator.GetElasticSpecimenProcessor; var finished = false; var counter = 0; var counterEnd = 1000000; while (!finished && counter < counterEnd) { var response = await processor.Process(counter, counter + 1000); Console.WriteLine($"Successes: {response.Successes} / Failures: {response.Failures}"); if (response.Failures > 0) { throw new Exception("Errors! Noooooo!"); } else if (response.Successes == 0) { finished = true; } counter += 1000; } } else if (importer.Type == ImporterType.JsonImporter && importer.ImportModel == "PlantsFile") { var transformer = new NatureServeTransformer(); var dataFile = FileHelpers.GetDatafileName(importer.Filename, dataDirectory); var textImporter = new JsonImporter <IEnumerable <Plant> >(dataFile); var plants = textImporter.ImportObjectAsync(); var plantInfos = new List <PlantInfo>(); List <Taxon> taxons; List <Lifeform> lifeforms; List <Origin> origins; foreach (var plant in plants) { plantInfos.Add(transformer.Transform(plant)); } var processor = _importTransformOrchestrator.GetNatureServePlantInfoProcessor(); await processor.InitializeOrigin(transformer.Origin); await processor.InitializeLifeforms(); await processor.InitializeTaxons(); await processor.InitializeOrigins(); taxons = processor.Taxons; lifeforms = processor.Lifeforms; origins = processor.Origins; var batchSize = 500; var finished = false; var start = 0; var counter = start; var counterEnd = 40000; plantInfos = plantInfos.OrderBy(p => p.Origin.ExternalId).ToList(); while (!finished && counter < counterEnd) { if (counter > start) { processor = _importTransformOrchestrator.GetNatureServePlantInfoProcessor(lifeforms, taxons, origins); await processor.InitializeOrigin(transformer.Origin); } var batch = plantInfos.Skip(counter).Take(batchSize); if (!batch.Any()) { finished = true; continue; } var result = await processor.Process(batch); Console.WriteLine($"Processed: {result.Count()} plantInfos - ended on {batch.Last().ScientificName}"); counter += batchSize; } } } }