Esempio n. 1
0
        private async Task ProcessTaxonomicUnits(ITISPlantInfoTransformer transformer, PlantInfoProcessor processor, List <TaxonomicUnit> taxonomicUnits)
        {
            var plantInfos = new List <PlantInfo>();

            foreach (var taxonomicUnit in taxonomicUnits.GroupBy(t => t.Tsn))
            {
                var species = taxonomicUnit.First();
                if (species != null)
                {
                    try
                    {
                        var plantInfoResults = transformer.Transform(taxonomicUnit);
                        plantInfos.AddRange(plantInfoResults);
                    }
                    catch (Exception ex)
                    {
                        _logger.LogError($"Unable to process {taxonomicUnit.Key} {species} {ex.Message}", ex);
                    }
                }
            }

            var plantInfosResult = await processor.Process(plantInfos);

            foreach (var plantInfoResult in plantInfosResult)
            {
                _logger.LogInformation("CommonName" + ": " + plantInfoResult.CommonName + " ScientificName" + ": " + plantInfoResult.ScientificName +
                                       " PlantInfoId" + ": " + plantInfoResult.PlantInfoId);
            }
        }
Esempio n. 2
0
        public async Task TestITISPlantInfoProcessor()
        {
            var transformer = new ITISPlantInfoTransformer();
            var itisData    = ITISPlantInfoData();
            var plantInfos  = new List <PlantInfo>();

            itisData.ForEach(i => plantInfos.AddRange(transformer.Transform(new List <TaxonomicUnit> {
                i
            })));

            var originRepository        = RepositoryMocks.GetStandardMockOriginRepository(new List <Stores.Origin>());
            var locationRepository      = RepositoryMocks.GetStandardMockLocationRepository(new List <Stores.Location>());
            var lifeformRepository      = RepositoryMocks.GetStandardMockLifeformRepository(new List <Stores.Lifeform>());
            var plantInfoRepository     = RepositoryMocks.GetStandardMockPlantInfoRepository(new List <Stores.PlantInfo>());
            var plantLocationRepository = RepositoryMocks.GetStandardMockPlantLocationRepository(new List <Stores.PlantLocation>());
            var synonymRepository       = RepositoryMocks.GetStandardMockSynonymRepository();
            var taxonRepository         = RepositoryMocks.GetStandardMockTaxonRepository(new List <Stores.Taxon>());
            var plantSynonymRepository  = new Mock <IRepository <Stores.PlantSynonym> >();
            var plantInfoIndex          = SearchMocks.GetStandardMockPlantInfoIndex();
            var lifeformIndex           = SearchMocks.GetStandardMockLifeformIndex();

            var locationService  = new LocationService(locationRepository.Object);
            var originService    = new OriginService(originRepository.Object, locationService);
            var lifeformService  = new LifeformService(lifeformRepository.Object, lifeformIndex.Object);
            var plantInfoService = new PlantInfoService(plantInfoRepository.Object, plantLocationRepository.Object, plantInfoIndex.Object);
            var synonymService   = new SynonymService(synonymRepository.Object);
            var taxonService     = new TaxonService(taxonRepository.Object, synonymService);

            var processor = new PlantInfoProcessor(lifeformService, originService, plantInfoService, taxonService, locationService);

            await processor.InitializeOrigin(transformer.Origin);

            await processor.InitializeLifeforms();

            await processor.InitializeTaxons();

            var result = await processor.Process(plantInfos);

            result.Count(p => p.ScientificName == "Glandularia quadrangulata").Should().Be(1);
            result.Count(p => p.Taxon.Subfamily == null).Should().Be(5);
            result.Count(p => p.Taxon.Species == "cremersii").Should().Be(1);
            result.Count(p => p.Taxon.Form == "viridifolia").Should().Be(1);
            result.Count(p => p.Taxon.Subspecies == "purpurea").Should().Be(1);
            result.Count(p => p.Taxon.Variety == "graminea").Should().Be(1);
            result.Where(p => p.Locations != null).SelectMany(p => p.Locations).Count().Should().Be(3);
            result.Where(p => p.Locations != null).SelectMany(p => p.Locations).Count(l => l.Status == LocationStatus.Native).Should().Be(3);
            result.Select(p => p.Origin).DistinctBy(o => o.OriginId).Count().Should().Be(5);
        }
Esempio n. 3
0
        public void TestITISPlantInfoTransformer()
        {
            var transformer = new ITISPlantInfoTransformer();

            var itisData = ITISPlantInfoData();
            var result   = new List <PlantInfo>();

            itisData.ForEach(i => result.AddRange(transformer.Transform(new List <TaxonomicUnit> {
                i
            })));

            result.Count(p => p.ScientificName == "Glandularia quadrangulata").Should().Be(1);
            result.Count(p => p.Taxon.Subfamily == null).Should().Be(6);
            result.Count(p => p.Taxon.Species == "cremersii").Should().Be(2);
            result.Count(p => p.Taxon.Form == "viridifolia").Should().Be(2);
            result.Count(p => p.Taxon.Subspecies == "purpurea").Should().Be(1);
            result.Count(p => p.Taxon.Variety == "graminea").Should().Be(1);
            result.Where(p => p.Locations != null).SelectMany(p => p.Locations).Count().Should().Be(3);
            result.SelectMany(p => p.Locations).DistinctBy(l => l.Location.Region).Count().Should().Be(1);
            result.Select(p => p.Origin).Count().Should().Be(6);
        }
Esempio n. 4
0
        public async Task Run(string[] args)
        {
            var    importers = LoadImporters(Configuration);
            string dataDirectory;

            if (args.Length != 0)
            {
                dataDirectory = args[0];
            }
            else
            {
                dataDirectory = Configuration["dataDirectory"];
            }

            foreach (var importer in importers.Where(i => i.IsActive))
            {
                if (importer.Type == ImporterType.TextImporter && importer.ImportModel == "TaxonomicUnit")
                {
                    var processor   = _importTransformOrchestrator.GetPlantInfoProcessor;
                    var transformer = new USDATransformer();
                    var startRow    = 1;
                    var batchSize   = 100;

                    await processor.InitializeOrigin(transformer.Origin);

                    await processor.InitializeLifeforms();

                    await processor.InitializeTaxons();

                    var dataFile     = FileHelpers.GetDatafileName(importer.Filename, dataDirectory);
                    var textImporter = new TextImporter <Checklist>(dataFile, importer.HasHeaders);
                    var row          = 1;
                    var checklists   = new List <Checklist>();
                    await foreach (var result in textImporter.Import())
                    {
                        row++;
                        if (row < startRow)
                        {
                            continue;
                        }
                        else if (row % batchSize != 0)
                        {
                            checklists.Add(result);
                        }
                        else
                        {
                            if (checklists.Any())
                            {
                                await ProcessChecklists(transformer, processor, checklists);

                                checklists.Clear();
                            }
                        }

                        if (checklists.Any())
                        {
                            await ProcessChecklists(transformer, processor, checklists);

                            checklists.Clear();
                        }
                    }
                }
                else if (importer.Type == ImporterType.SqlImporter && importer.ImportModel == "TaxonomicUnit")
                {
                    var processor      = _importTransformOrchestrator.GetPlantInfoProcessor;
                    var sqlImporter    = new SqlImporter <TaxonomicUnit>(importer.ConnectionString, importer.SqlQuery);
                    var transformer    = new ITISPlantInfoTransformer();
                    var startRow       = 0;
                    var batchSize      = 100;
                    var row            = 0;
                    var taxonomicUnits = new List <TaxonomicUnit>();

                    await processor.InitializeOrigin(transformer.Origin);

                    await processor.InitializeLifeforms();

                    await processor.InitializeTaxons();

                    await foreach (var result in sqlImporter.Import())
                    {
                        row++;
                        if (row < startRow)
                        {
                            continue;
                        }
                        else if (row % batchSize != 0)
                        {
                            taxonomicUnits.Add(result);
                        }
                        else
                        {
                            if (taxonomicUnits.Any())
                            {
                                await ProcessTaxonomicUnits(transformer, processor, taxonomicUnits);

                                taxonomicUnits.Clear();
                            }
                        }
                    }

                    if (taxonomicUnits.Any())
                    {
                        await ProcessTaxonomicUnits(transformer, processor, taxonomicUnits);

                        taxonomicUnits.Clear();
                    }
                }
                else if (importer.Type == ImporterType.SqlImporter && importer.ImportModel == "Vernacular")
                {
                    var processor   = _importTransformOrchestrator.GetSynonymProcessor;
                    var sqlImporter = new SqlImporter <Vernacular>(importer.ConnectionString, importer.SqlQuery);
                    var transformer = new ITISSynonymTransformer();
                    var startRow    = 0;
                    var batchSize   = 500;
                    var row         = 0;
                    var vernaculars = new List <Vernacular>();

                    await processor.InitializeOrigin(transformer.Origin);

                    await processor.InitializeTaxons();

                    await foreach (var result in sqlImporter.Import())
                    {
                        row++;
                        if (row < startRow)
                        {
#pragma warning disable S3626 // Jump statements should not be redundant
                            continue;
#pragma warning restore S3626 // Jump statements should not be redundant
                        }
                        else if (row % batchSize != 0)
                        {
                            vernaculars.Add(result);
                        }
                        else
                        {
                            await ProcessSynonyms(transformer, processor, vernaculars);

                            vernaculars.Clear();
                        }
                    }
                    if (vernaculars.Any())
                    {
                        await ProcessSynonyms(transformer, processor, vernaculars);

                        vernaculars.Clear();
                    }
                }
                else if (importer.Type == ImporterType.EFImporter && importer.ImportModel == "PlantInfo")
                {
                    var processor  = _importTransformOrchestrator.GetElasticPlantInfoProcessor;
                    var finished   = false;
                    var counter    = 0;
                    var counterEnd = 40000;

                    while (!finished && counter < counterEnd)
                    {
                        //var response = await processor.Process(counter, counter + 1000);
                        var response = await processor.ProcessSome(counter, 1000);

                        Console.WriteLine($"Successes: {response.Successes} / Failures: {response.Failures}");

                        if (response.Failures > 0)
                        {
                            throw new Exception("Errors! Noooooo!");
                        }
                        else if (response.Successes == 0)
                        {
                            finished = true;
                        }

                        counter += 1000;
                    }
                }
                else if (importer.Type == ImporterType.EFImporter && importer.ImportModel == "Specimen")
                {
                    var processor  = _importTransformOrchestrator.GetElasticSpecimenProcessor;
                    var finished   = false;
                    var counter    = 0;
                    var counterEnd = 1000000;

                    while (!finished && counter < counterEnd)
                    {
                        var response = await processor.Process(counter, counter + 1000);

                        Console.WriteLine($"Successes: {response.Successes} / Failures: {response.Failures}");

                        if (response.Failures > 0)
                        {
                            throw new Exception("Errors! Noooooo!");
                        }
                        else if (response.Successes == 0)
                        {
                            finished = true;
                        }

                        counter += 1000;
                    }
                }
                else if (importer.Type == ImporterType.JsonImporter && importer.ImportModel == "PlantsFile")
                {
                    var transformer  = new NatureServeTransformer();
                    var dataFile     = FileHelpers.GetDatafileName(importer.Filename, dataDirectory);
                    var textImporter = new JsonImporter <IEnumerable <Plant> >(dataFile);
                    var plants       = textImporter.ImportObjectAsync();
                    var plantInfos   = new List <PlantInfo>();

                    List <Taxon>    taxons;
                    List <Lifeform> lifeforms;
                    List <Origin>   origins;

                    foreach (var plant in plants)
                    {
                        plantInfos.Add(transformer.Transform(plant));
                    }

                    var processor = _importTransformOrchestrator.GetNatureServePlantInfoProcessor();
                    await processor.InitializeOrigin(transformer.Origin);

                    await processor.InitializeLifeforms();

                    await processor.InitializeTaxons();

                    await processor.InitializeOrigins();

                    taxons    = processor.Taxons;
                    lifeforms = processor.Lifeforms;
                    origins   = processor.Origins;

                    var batchSize  = 500;
                    var finished   = false;
                    var start      = 0;
                    var counter    = start;
                    var counterEnd = 40000;

                    plantInfos = plantInfos.OrderBy(p => p.Origin.ExternalId).ToList();

                    while (!finished && counter < counterEnd)
                    {
                        if (counter > start)
                        {
                            processor = _importTransformOrchestrator.GetNatureServePlantInfoProcessor(lifeforms, taxons, origins);
                            await processor.InitializeOrigin(transformer.Origin);
                        }

                        var batch = plantInfos.Skip(counter).Take(batchSize);

                        if (!batch.Any())
                        {
                            finished = true;
                            continue;
                        }

                        var result = await processor.Process(batch);

                        Console.WriteLine($"Processed: {result.Count()} plantInfos - ended on {batch.Last().ScientificName}");
                        counter += batchSize;
                    }
                }
            }
        }