コード例 #1
0
ファイル: Runner.cs プロジェクト: carlingkirk/emergence
        private async Task ProcessSynonyms(ITISSynonymTransformer transformer, SynonymProcessor processor, List <Vernacular> vernaculars)
        {
            if (vernaculars.Any())
            {
                var synonyms = new List <Synonym>();
                foreach (var vernacular in vernaculars)
                {
                    try
                    {
                        var synonym = transformer.Transform(vernacular);
                        synonyms.Add(synonym);
                    }
                    catch (Exception ex)
                    {
                        _logger.LogError($"Unable to process {vernacular.Taxon} {vernacular.Name} {vernacular.Rank} {ex.Message}", ex);
                    }
                }

                var synonymsResult = await processor.Process(synonyms);

                foreach (var synonymResult in synonymsResult)
                {
                    _logger.LogInformation("TaxonId" + ": " + synonymResult.Taxon.TaxonId + " Synonym" + ": " + synonymResult.Name +
                                           " Rank" + ": " + synonymResult.Rank);
                }
            }
        }
コード例 #2
0
ファイル: ITISTests.cs プロジェクト: carlingkirk/emergence
        public void TestITISSynonymTransformer()
        {
            var transformer = new ITISSynonymTransformer();

            var itisData = ITISSynonymData();
            var result   = new List <Synonym>();

            itisData.ForEach(i => result.Add(transformer.Transform(i)));

            result.Count.Should().Be(16);
            result.Count(s => s.Taxon.Kingdom == "Plantae").Should().Be(1);
            result.Count(s => s.Language == "English").Should().Be(10);
            result.Count(s => s.DateUpdated.Year == 2003).Should().Be(3);
            result.Count(s => s.Rank == "Variety").Should().Be(2);
            result.Count(s => s.Taxon.Variety != null).Should().Be(2);
            result.Count(s => s.Taxon.Variety == "paludicola").Should().Be(1);
            result.Select(s => s.Origin).Count().Should().Be(16);
        }
コード例 #3
0
ファイル: ITISTests.cs プロジェクト: carlingkirk/emergence
        public async Task TestITISSynonymProcessor()
        {
            var transformer = new ITISSynonymTransformer();

            var itisData = ITISSynonymData();
            var synonyms = new List <Synonym>();

            itisData.ForEach(i => synonyms.Add(transformer.Transform(i)));

            var originRepository   = RepositoryMocks.GetStandardMockOriginRepository(new List <Stores.Origin>());
            var locationRepository = RepositoryMocks.GetStandardMockLocationRepository(new List <Stores.Location>());
            var synonymRepository  = RepositoryMocks.GetStandardMockSynonymRepository(new List <Stores.Synonym>());
            var taxonRepository    = RepositoryMocks.GetStandardMockTaxonRepository(new List <Stores.Taxon>());

            var locationService = new LocationService(locationRepository.Object);
            var originService   = new OriginService(originRepository.Object, locationService);
            var synonymService  = new SynonymService(synonymRepository.Object);
            var taxonService    = new TaxonService(taxonRepository.Object, synonymService);

            var processor = new SynonymProcessor(synonymService, originService, taxonService);

            await processor.InitializeOrigin(transformer.Origin);

            await processor.InitializeTaxons();

            var result = await processor.Process(synonyms);

            result.Count().Should().Be(16);
            result.Count(s => s.Taxon.Kingdom == "Plantae").Should().Be(1);
            result.Count(s => s.Language == "English").Should().Be(10);
            result.Count(s => s.DateUpdated.Year == 2003).Should().Be(3);
            result.Count(s => s.Rank == "Variety").Should().Be(2);
            result.Count(s => s.Taxon.Variety != null).Should().Be(2);
            result.Count(s => s.Taxon.Variety == "paludicola").Should().Be(1);
            result.Select(s => s.Origin).Count().Should().Be(16);
        }
コード例 #4
0
ファイル: Runner.cs プロジェクト: carlingkirk/emergence
        public async Task Run(string[] args)
        {
            var    importers = LoadImporters(Configuration);
            string dataDirectory;

            if (args.Length != 0)
            {
                dataDirectory = args[0];
            }
            else
            {
                dataDirectory = Configuration["dataDirectory"];
            }

            foreach (var importer in importers.Where(i => i.IsActive))
            {
                if (importer.Type == ImporterType.TextImporter && importer.ImportModel == "TaxonomicUnit")
                {
                    var processor   = _importTransformOrchestrator.GetPlantInfoProcessor;
                    var transformer = new USDATransformer();
                    var startRow    = 1;
                    var batchSize   = 100;

                    await processor.InitializeOrigin(transformer.Origin);

                    await processor.InitializeLifeforms();

                    await processor.InitializeTaxons();

                    var dataFile     = FileHelpers.GetDatafileName(importer.Filename, dataDirectory);
                    var textImporter = new TextImporter <Checklist>(dataFile, importer.HasHeaders);
                    var row          = 1;
                    var checklists   = new List <Checklist>();
                    await foreach (var result in textImporter.Import())
                    {
                        row++;
                        if (row < startRow)
                        {
                            continue;
                        }
                        else if (row % batchSize != 0)
                        {
                            checklists.Add(result);
                        }
                        else
                        {
                            if (checklists.Any())
                            {
                                await ProcessChecklists(transformer, processor, checklists);

                                checklists.Clear();
                            }
                        }

                        if (checklists.Any())
                        {
                            await ProcessChecklists(transformer, processor, checklists);

                            checklists.Clear();
                        }
                    }
                }
                else if (importer.Type == ImporterType.SqlImporter && importer.ImportModel == "TaxonomicUnit")
                {
                    var processor      = _importTransformOrchestrator.GetPlantInfoProcessor;
                    var sqlImporter    = new SqlImporter <TaxonomicUnit>(importer.ConnectionString, importer.SqlQuery);
                    var transformer    = new ITISPlantInfoTransformer();
                    var startRow       = 0;
                    var batchSize      = 100;
                    var row            = 0;
                    var taxonomicUnits = new List <TaxonomicUnit>();

                    await processor.InitializeOrigin(transformer.Origin);

                    await processor.InitializeLifeforms();

                    await processor.InitializeTaxons();

                    await foreach (var result in sqlImporter.Import())
                    {
                        row++;
                        if (row < startRow)
                        {
                            continue;
                        }
                        else if (row % batchSize != 0)
                        {
                            taxonomicUnits.Add(result);
                        }
                        else
                        {
                            if (taxonomicUnits.Any())
                            {
                                await ProcessTaxonomicUnits(transformer, processor, taxonomicUnits);

                                taxonomicUnits.Clear();
                            }
                        }
                    }

                    if (taxonomicUnits.Any())
                    {
                        await ProcessTaxonomicUnits(transformer, processor, taxonomicUnits);

                        taxonomicUnits.Clear();
                    }
                }
                else if (importer.Type == ImporterType.SqlImporter && importer.ImportModel == "Vernacular")
                {
                    var processor   = _importTransformOrchestrator.GetSynonymProcessor;
                    var sqlImporter = new SqlImporter <Vernacular>(importer.ConnectionString, importer.SqlQuery);
                    var transformer = new ITISSynonymTransformer();
                    var startRow    = 0;
                    var batchSize   = 500;
                    var row         = 0;
                    var vernaculars = new List <Vernacular>();

                    await processor.InitializeOrigin(transformer.Origin);

                    await processor.InitializeTaxons();

                    await foreach (var result in sqlImporter.Import())
                    {
                        row++;
                        if (row < startRow)
                        {
#pragma warning disable S3626 // Jump statements should not be redundant
                            continue;
#pragma warning restore S3626 // Jump statements should not be redundant
                        }
                        else if (row % batchSize != 0)
                        {
                            vernaculars.Add(result);
                        }
                        else
                        {
                            await ProcessSynonyms(transformer, processor, vernaculars);

                            vernaculars.Clear();
                        }
                    }
                    if (vernaculars.Any())
                    {
                        await ProcessSynonyms(transformer, processor, vernaculars);

                        vernaculars.Clear();
                    }
                }
                else if (importer.Type == ImporterType.EFImporter && importer.ImportModel == "PlantInfo")
                {
                    var processor  = _importTransformOrchestrator.GetElasticPlantInfoProcessor;
                    var finished   = false;
                    var counter    = 0;
                    var counterEnd = 40000;

                    while (!finished && counter < counterEnd)
                    {
                        //var response = await processor.Process(counter, counter + 1000);
                        var response = await processor.ProcessSome(counter, 1000);

                        Console.WriteLine($"Successes: {response.Successes} / Failures: {response.Failures}");

                        if (response.Failures > 0)
                        {
                            throw new Exception("Errors! Noooooo!");
                        }
                        else if (response.Successes == 0)
                        {
                            finished = true;
                        }

                        counter += 1000;
                    }
                }
                else if (importer.Type == ImporterType.EFImporter && importer.ImportModel == "Specimen")
                {
                    var processor  = _importTransformOrchestrator.GetElasticSpecimenProcessor;
                    var finished   = false;
                    var counter    = 0;
                    var counterEnd = 1000000;

                    while (!finished && counter < counterEnd)
                    {
                        var response = await processor.Process(counter, counter + 1000);

                        Console.WriteLine($"Successes: {response.Successes} / Failures: {response.Failures}");

                        if (response.Failures > 0)
                        {
                            throw new Exception("Errors! Noooooo!");
                        }
                        else if (response.Successes == 0)
                        {
                            finished = true;
                        }

                        counter += 1000;
                    }
                }
                else if (importer.Type == ImporterType.JsonImporter && importer.ImportModel == "PlantsFile")
                {
                    var transformer  = new NatureServeTransformer();
                    var dataFile     = FileHelpers.GetDatafileName(importer.Filename, dataDirectory);
                    var textImporter = new JsonImporter <IEnumerable <Plant> >(dataFile);
                    var plants       = textImporter.ImportObjectAsync();
                    var plantInfos   = new List <PlantInfo>();

                    List <Taxon>    taxons;
                    List <Lifeform> lifeforms;
                    List <Origin>   origins;

                    foreach (var plant in plants)
                    {
                        plantInfos.Add(transformer.Transform(plant));
                    }

                    var processor = _importTransformOrchestrator.GetNatureServePlantInfoProcessor();
                    await processor.InitializeOrigin(transformer.Origin);

                    await processor.InitializeLifeforms();

                    await processor.InitializeTaxons();

                    await processor.InitializeOrigins();

                    taxons    = processor.Taxons;
                    lifeforms = processor.Lifeforms;
                    origins   = processor.Origins;

                    var batchSize  = 500;
                    var finished   = false;
                    var start      = 0;
                    var counter    = start;
                    var counterEnd = 40000;

                    plantInfos = plantInfos.OrderBy(p => p.Origin.ExternalId).ToList();

                    while (!finished && counter < counterEnd)
                    {
                        if (counter > start)
                        {
                            processor = _importTransformOrchestrator.GetNatureServePlantInfoProcessor(lifeforms, taxons, origins);
                            await processor.InitializeOrigin(transformer.Origin);
                        }

                        var batch = plantInfos.Skip(counter).Take(batchSize);

                        if (!batch.Any())
                        {
                            finished = true;
                            continue;
                        }

                        var result = await processor.Process(batch);

                        Console.WriteLine($"Processed: {result.Count()} plantInfos - ended on {batch.Last().ScientificName}");
                        counter += batchSize;
                    }
                }
            }
        }