private void ImportData(int source_id, bool rebuild_ad_tables, bool using_test_data) { // Obtain source details, augment with connection string for this database. ISource source = _mon_repo.FetchSourceParameters(source_id); Credentials creds = _mon_repo.Credentials; source.db_conn = creds.GetConnectionString(source.database_name, using_test_data); if (using_test_data) { // first need to copy sd data back from composite // sd tables to the sd tables... _test_repo.RetrieveSDData(source); } _logger_helper.Logheader("Setup"); _logger.Information("For source: " + source.id + ": " + source.database_name); // Establish top level builder classes and // set up sf monitor tables as foreign tables, temporarily. ImportBuilder ib = new ImportBuilder(source, _mon_repo, _logger); DataTransferrer transferrer = new DataTransferrer(source, _logger); transferrer.EstablishForeignMonTables(creds); _logger.Information("Foreign (mon) tables established in database"); // Recreate ad tables if necessary. If the second pass of a // test loop will need to retrieve the ad data back from compad if (rebuild_ad_tables) { ADBuilder adb = new ADBuilder(source, _mon_repo, _logger); adb.BuildNewADTables(); } else { if (using_test_data) { _test_repo.RetrieveADData(source); } } // create and fill temporary tables to hold ids and edit statuses // of new, edited, deleted tudies and data objects. _logger_helper.Logheader("Start Import Process"); _logger_helper.Logheader("Create and fill diff tables"); ib.CreateImportTables(); bool count_deleted = false; if (!using_test_data) { count_deleted = _mon_repo.CheckIfFullHarvest(source.id); } ib.FillImportTables(count_deleted); _mon_repo.LogDiffs(source); // Create import event log record and start // the data transfer proper... int import_id = _mon_repo.GetNextImportEventId(); ImportEvent import = ib.CreateImportEvent(import_id); // Consider new studies, record dates, edited studies and / or objects, // and any deleted studies / objects _logger_helper.Logheader("Adding new data"); if (source.has_study_tables) { transferrer.AddNewStudies(import_id); } transferrer.AddNewDataObjects(import_id); _logger_helper.Logheader("Updating dates of data"); transferrer.UpdateDatesOfData(); _logger_helper.Logheader("Editing existing data where necessary"); if (source.has_study_tables) { transferrer.UpdateEditedStudyData(import_id); } transferrer.UpdateEditedDataObjectData(import_id); _logger_helper.Logheader("Deleting data no longer present in source"); if (source.has_study_tables) { transferrer.RemoveDeletedStudyData(import_id); } transferrer.RemoveDeletedDataObjectData(import_id); // Update the 'date imported' record in the mon.source data tables // Affects all records with status 1, 2 or 3 (non-test imports only) if (!using_test_data) { if (source.has_study_tables) { transferrer.UpdateStudiesLastImportedDate(import_id); } else { // only do the objects table if there are no studies (e.g. PubMed) transferrer.UpdateObjectsLastImportedDate(import_id); } } // Ensure that the full hash records have been updated // may not have been if change was only in attribute(s). // Remove foreign tables _logger_helper.Logheader("Tidy up and finish"); transferrer.UpdateFullStudyHashes(); transferrer.DropForeignMonTables(); _logger.Information("Foreign (mon) tables removed from database"); if (using_test_data) { // copy ad data from ad tables to the compad tables... _test_repo.TransferADDataToComp(source); } else { // Only store import event for non-test imports. _mon_repo.StoreImportEvent(import); } }
private void HarvestData(int source_id, Options opts) { // Obtain source details, augment with connection string for this database ISource source = _mon_repo.FetchSourceParameters(source_id); Credentials creds = _mon_repo.Credentials; source.db_conn = creds.GetConnectionString(source.database_name, opts.harvest_type_id); _logger_helper.LogStudyHeader(opts, "For source: " + source.id + ": " + source.database_name); if (!opts.org_update_only) { // Bulk of the harvesting process can be skipped if this run is just for updating // tables with context values. if (source.source_type == "test") { // Set up expected data for later processing. // This is data derived from manual inspection of files and requires // a very different method, using stored procedures in the test db _test_repo.EstablishExpectedData(); } else { // Otherwise... // construct the sd tables. (Some sources may be data objects only.) _logger_helper.LogHeader("Recreate database tables"); SchemaBuilder sdb = new SchemaBuilder(source, _logger); sdb.RecreateTables(); // Construct the harvest_event record. _logger_helper.LogHeader("Process data"); int harvest_id = _mon_repo.GetNextHarvestEventId(); HarvestEvent harvest = new HarvestEvent(harvest_id, source.id, opts.harvest_type_id); _logger.Information("Harvest event " + harvest_id.ToString() + " began"); // Harvest the data from the local XML files IStudyProcessor study_processor = null; IObjectProcessor object_processor = null; harvest.num_records_available = _mon_repo.FetchFullFileCount(source.id, source.source_type, opts.harvest_type_id); if (source.source_type == "study") { if (source.uses_who_harvest) { study_processor = new WHOProcessor(_mon_repo, _logger); } else { switch (source.id) { case 101900: { study_processor = new BioLinccProcessor(_mon_repo, _logger); break; } case 101901: { study_processor = new YodaProcessor(_mon_repo, _logger); break; } case 100120: { study_processor = new CTGProcessor(_mon_repo, _logger); break; } case 100123: { study_processor = new EUCTRProcessor(_mon_repo, _logger); break; } case 100126: { study_processor = new ISRCTNProcessor(_mon_repo, _logger); break; } } } StudyController c = new StudyController(_logger, _mon_repo, _storage_repo, source, study_processor); harvest.num_records_harvested = c.LoopThroughFiles(opts.harvest_type_id, harvest_id); } else { // source type is 'object' switch (source.id) { case 100135: { object_processor = new PubmedProcessor(_mon_repo, _logger); break; } } ObjectController c = new ObjectController(_logger, _mon_repo, _storage_repo, source, object_processor); harvest.num_records_harvested = c.LoopThroughFiles(opts.harvest_type_id, harvest_id); } harvest.time_ended = DateTime.Now; _mon_repo.StoreHarvestEvent(harvest); _logger.Information("Number of source XML files: " + harvest.num_records_available.ToString()); _logger.Information("Number of files harvested: " + harvest.num_records_harvested.ToString()); _logger.Information("Harvest event " + harvest_id.ToString() + " ended"); } } // The functions below have to be run even if the harvest is 'org_update_only', // and also for the expected data in the test database. // ------------------------------------------------------------------- // MAKE USE OF SEPARATE 'CONTEXT' PROJECT (Same Solution, not DLL) // ------------------------------------------------------------------- ContextDataManager.Source context_source = new ContextDataManager.Source(source.id, source.source_type, source.database_name, source.db_conn, source.has_study_tables, source.has_study_topics, source.has_study_contributors); ContextDataManager.Credentials context_creds = new ContextDataManager.Credentials(creds.Host, creds.Username, creds.Password); _logger_helper.LogHeader("Updating context data"); ContextMain context_main = new ContextMain(_logger); context_main.UpdateDataFromContext(context_creds, context_source); // ------------------------------------------------------------------- // MAKE USE OF SEPARATE 'HASH' PROJECT (Same Solution, not DLL) // ------------------------------------------------------------------- // Note the hashes can only be done after all the data is complete, including // the organisation and topic codes and names derived above HashDataLibrary.Source hash_source = new HashDataLibrary.Source(source.id, source.source_type, source.database_name, source.db_conn, source.has_study_tables, source.has_study_topics, source.has_study_features, source.has_study_contributors, source.has_study_references, source.has_study_relationships, source.has_study_links, source.has_study_ipd_available, source.has_object_datasets, source.has_object_dates, source.has_object_rights, source.has_object_relationships, source.has_object_pubmed_set); _logger_helper.LogHeader("Creating Record Hashes"); HashMain hash_main = new HashMain(_logger, hash_source); hash_main.HashData(); // If harvesting test data it needs to be transferred // to the sdcomp schema for safekeeping and further processing // If a normal harvest from a full source statistics should be produced. // If the harvest was of the manual 'expected' data do neither. if (source.source_type != "test") { if (opts.harvest_type_id == 3) { // transfer sd data to test composite data store for later comparison // otherwise it will be overwritten by the next harvest of sd data _test_repo.TransferTestSDData(source); } else { // summarise results by providing stats on the sd tables _logger_helper.LogTableStatistics(source, "sd"); } } }