private void HarvestData(int source_id, Options opts) { // Obtain source details, augment with connection string for this database ISource source = _mon_repo.FetchSourceParameters(source_id); Credentials creds = _mon_repo.Credentials; source.db_conn = creds.GetConnectionString(source.database_name, opts.harvest_type_id); _logger_helper.LogStudyHeader(opts, "For source: " + source.id + ": " + source.database_name); if (!opts.org_update_only) { // Bulk of the harvesting process can be skipped if this run is just for updating // tables with context values. if (source.source_type == "test") { // Set up expected data for later processing. // This is data derived from manual inspection of files and requires // a very different method, using stored procedures in the test db _test_repo.EstablishExpectedData(); } else { // Otherwise... // construct the sd tables. (Some sources may be data objects only.) _logger_helper.LogHeader("Recreate database tables"); SchemaBuilder sdb = new SchemaBuilder(source, _logger); sdb.RecreateTables(); // Construct the harvest_event record. _logger_helper.LogHeader("Process data"); int harvest_id = _mon_repo.GetNextHarvestEventId(); HarvestEvent harvest = new HarvestEvent(harvest_id, source.id, opts.harvest_type_id); _logger.Information("Harvest event " + harvest_id.ToString() + " began"); // Harvest the data from the local XML files IStudyProcessor study_processor = null; IObjectProcessor object_processor = null; harvest.num_records_available = _mon_repo.FetchFullFileCount(source.id, source.source_type, opts.harvest_type_id); if (source.source_type == "study") { if (source.uses_who_harvest) { study_processor = new WHOProcessor(_mon_repo, _logger); } else { switch (source.id) { case 101900: { study_processor = new BioLinccProcessor(_mon_repo, _logger); break; } case 101901: { study_processor = new YodaProcessor(_mon_repo, _logger); break; } case 100120: { study_processor = new CTGProcessor(_mon_repo, _logger); break; } case 100123: { study_processor = new EUCTRProcessor(_mon_repo, _logger); break; } case 100126: { study_processor = new ISRCTNProcessor(_mon_repo, _logger); break; } } } StudyController c = new StudyController(_logger, _mon_repo, _storage_repo, source, study_processor); harvest.num_records_harvested = c.LoopThroughFiles(opts.harvest_type_id, harvest_id); } else { // source type is 'object' switch (source.id) { case 100135: { object_processor = new PubmedProcessor(_mon_repo, _logger); break; } } ObjectController c = new ObjectController(_logger, _mon_repo, _storage_repo, source, object_processor); harvest.num_records_harvested = c.LoopThroughFiles(opts.harvest_type_id, harvest_id); } harvest.time_ended = DateTime.Now; _mon_repo.StoreHarvestEvent(harvest); _logger.Information("Number of source XML files: " + harvest.num_records_available.ToString()); _logger.Information("Number of files harvested: " + harvest.num_records_harvested.ToString()); _logger.Information("Harvest event " + harvest_id.ToString() + " ended"); } } // The functions below have to be run even if the harvest is 'org_update_only', // and also for the expected data in the test database. // ------------------------------------------------------------------- // MAKE USE OF SEPARATE 'CONTEXT' PROJECT (Same Solution, not DLL) // ------------------------------------------------------------------- ContextDataManager.Source context_source = new ContextDataManager.Source(source.id, source.source_type, source.database_name, source.db_conn, source.has_study_tables, source.has_study_topics, source.has_study_contributors); ContextDataManager.Credentials context_creds = new ContextDataManager.Credentials(creds.Host, creds.Username, creds.Password); _logger_helper.LogHeader("Updating context data"); ContextMain context_main = new ContextMain(_logger); context_main.UpdateDataFromContext(context_creds, context_source); // ------------------------------------------------------------------- // MAKE USE OF SEPARATE 'HASH' PROJECT (Same Solution, not DLL) // ------------------------------------------------------------------- // Note the hashes can only be done after all the data is complete, including // the organisation and topic codes and names derived above HashDataLibrary.Source hash_source = new HashDataLibrary.Source(source.id, source.source_type, source.database_name, source.db_conn, source.has_study_tables, source.has_study_topics, source.has_study_features, source.has_study_contributors, source.has_study_references, source.has_study_relationships, source.has_study_links, source.has_study_ipd_available, source.has_object_datasets, source.has_object_dates, source.has_object_rights, source.has_object_relationships, source.has_object_pubmed_set); _logger_helper.LogHeader("Creating Record Hashes"); HashMain hash_main = new HashMain(_logger, hash_source); hash_main.HashData(); // If harvesting test data it needs to be transferred // to the sdcomp schema for safekeeping and further processing // If a normal harvest from a full source statistics should be produced. // If the harvest was of the manual 'expected' data do neither. if (source.source_type != "test") { if (opts.harvest_type_id == 3) { // transfer sd data to test composite data store for later comparison // otherwise it will be overwritten by the next harvest of sd data _test_repo.TransferTestSDData(source); } else { // summarise results by providing stats on the sd tables _logger_helper.LogTableStatistics(source, "sd"); } } }