예제 #1
0
        private void ImportData(int source_id, bool rebuild_ad_tables, bool using_test_data)
        {
            // Obtain source details, augment with connection string for this database.

            ISource     source = _mon_repo.FetchSourceParameters(source_id);
            Credentials creds  = _mon_repo.Credentials;

            source.db_conn = creds.GetConnectionString(source.database_name, using_test_data);


            if (using_test_data)
            {
                // first need to copy sd data back from composite
                // sd tables to the sd tables...

                _test_repo.RetrieveSDData(source);
            }


            _logger_helper.Logheader("Setup");
            _logger.Information("For source: " + source.id + ": " + source.database_name);


            // Establish top level builder classes and
            // set up sf monitor tables as foreign tables, temporarily.

            ImportBuilder   ib          = new ImportBuilder(source, _mon_repo, _logger);
            DataTransferrer transferrer = new DataTransferrer(source, _logger);

            transferrer.EstablishForeignMonTables(creds);
            _logger.Information("Foreign (mon) tables established in database");

            // Recreate ad tables if necessary. If the second pass of a
            // test loop will need to retrieve the ad data back from compad

            if (rebuild_ad_tables)
            {
                ADBuilder adb = new ADBuilder(source, _mon_repo, _logger);
                adb.BuildNewADTables();
            }
            else
            {
                if (using_test_data)
                {
                    _test_repo.RetrieveADData(source);
                }
            }

            // create and fill temporary tables to hold ids and edit statuses
            // of new, edited, deleted tudies and data objects.

            _logger_helper.Logheader("Start Import Process");
            _logger_helper.Logheader("Create and fill diff tables");
            ib.CreateImportTables();
            bool count_deleted = false;

            if (!using_test_data)
            {
                count_deleted = _mon_repo.CheckIfFullHarvest(source.id);
            }
            ib.FillImportTables(count_deleted);
            _mon_repo.LogDiffs(source);


            // Create import event log record and start
            // the data transfer proper...

            int         import_id = _mon_repo.GetNextImportEventId();
            ImportEvent import    = ib.CreateImportEvent(import_id);

            // Consider new studies, record dates, edited studies and / or objects,
            // and any deleted studies / objects

            _logger_helper.Logheader("Adding new data");
            if (source.has_study_tables)
            {
                transferrer.AddNewStudies(import_id);
            }
            transferrer.AddNewDataObjects(import_id);


            _logger_helper.Logheader("Updating dates of data");
            transferrer.UpdateDatesOfData();


            _logger_helper.Logheader("Editing existing data where necessary");
            if (source.has_study_tables)
            {
                transferrer.UpdateEditedStudyData(import_id);
            }
            transferrer.UpdateEditedDataObjectData(import_id);


            _logger_helper.Logheader("Deleting data no longer present in source");
            if (source.has_study_tables)
            {
                transferrer.RemoveDeletedStudyData(import_id);
            }
            transferrer.RemoveDeletedDataObjectData(import_id);


            // Update the 'date imported' record in the mon.source data tables
            // Affects all records with status 1, 2 or 3 (non-test imports only)

            if (!using_test_data)
            {
                if (source.has_study_tables)
                {
                    transferrer.UpdateStudiesLastImportedDate(import_id);
                }
                else
                {
                    // only do the objects table if there are no studies (e.g. PubMed)
                    transferrer.UpdateObjectsLastImportedDate(import_id);
                }
            }


            // Ensure that the full hash records have been updated
            // may not have been if change was only in attribute(s).
            // Remove foreign tables

            _logger_helper.Logheader("Tidy up and finish");
            transferrer.UpdateFullStudyHashes();
            transferrer.DropForeignMonTables();
            _logger.Information("Foreign (mon) tables removed from database");

            if (using_test_data)
            {
                // copy ad data from ad tables to the compad tables...

                _test_repo.TransferADDataToComp(source);
            }
            else
            {
                // Only store import event for non-test imports.

                _mon_repo.StoreImportEvent(import);
            }
        }
예제 #2
0
        private void HarvestData(int source_id, Options opts)
        {
            // Obtain source details, augment with connection string for this database

            ISource     source = _mon_repo.FetchSourceParameters(source_id);
            Credentials creds  = _mon_repo.Credentials;

            source.db_conn = creds.GetConnectionString(source.database_name, opts.harvest_type_id);
            _logger_helper.LogStudyHeader(opts, "For source: " + source.id + ": " + source.database_name);

            if (!opts.org_update_only)
            {
                // Bulk of the harvesting process can be skipped if this run is just for updating
                // tables with context values.

                if (source.source_type == "test")
                {
                    // Set up expected data for later processing.
                    // This is data derived from manual inspection of files and requires
                    // a very different method, using stored procedures in the test db
                    _test_repo.EstablishExpectedData();
                }
                else
                {
                    // Otherwise...
                    // construct the sd tables. (Some sources may be data objects only.)

                    _logger_helper.LogHeader("Recreate database tables");
                    SchemaBuilder sdb = new SchemaBuilder(source, _logger);
                    sdb.RecreateTables();

                    // Construct the harvest_event record.

                    _logger_helper.LogHeader("Process data");
                    int          harvest_id = _mon_repo.GetNextHarvestEventId();
                    HarvestEvent harvest    = new HarvestEvent(harvest_id, source.id, opts.harvest_type_id);
                    _logger.Information("Harvest event " + harvest_id.ToString() + " began");

                    // Harvest the data from the local XML files
                    IStudyProcessor  study_processor  = null;
                    IObjectProcessor object_processor = null;
                    harvest.num_records_available = _mon_repo.FetchFullFileCount(source.id, source.source_type, opts.harvest_type_id);

                    if (source.source_type == "study")
                    {
                        if (source.uses_who_harvest)
                        {
                            study_processor = new WHOProcessor(_mon_repo, _logger);
                        }
                        else
                        {
                            switch (source.id)
                            {
                            case 101900:
                            {
                                study_processor = new BioLinccProcessor(_mon_repo, _logger);
                                break;
                            }

                            case 101901:
                            {
                                study_processor = new YodaProcessor(_mon_repo, _logger);
                                break;
                            }

                            case 100120:
                            {
                                study_processor = new CTGProcessor(_mon_repo, _logger);
                                break;
                            }

                            case 100123:
                            {
                                study_processor = new EUCTRProcessor(_mon_repo, _logger);
                                break;
                            }

                            case 100126:
                            {
                                study_processor = new ISRCTNProcessor(_mon_repo, _logger);
                                break;
                            }
                            }
                        }

                        StudyController c = new StudyController(_logger, _mon_repo, _storage_repo, source, study_processor);
                        harvest.num_records_harvested = c.LoopThroughFiles(opts.harvest_type_id, harvest_id);
                    }
                    else
                    {
                        // source type is 'object'
                        switch (source.id)
                        {
                        case 100135:
                        {
                            object_processor = new PubmedProcessor(_mon_repo, _logger);
                            break;
                        }
                        }

                        ObjectController c = new ObjectController(_logger, _mon_repo, _storage_repo, source, object_processor);
                        harvest.num_records_harvested = c.LoopThroughFiles(opts.harvest_type_id, harvest_id);
                    }

                    harvest.time_ended = DateTime.Now;
                    _mon_repo.StoreHarvestEvent(harvest);

                    _logger.Information("Number of source XML files: " + harvest.num_records_available.ToString());
                    _logger.Information("Number of files harvested: " + harvest.num_records_harvested.ToString());
                    _logger.Information("Harvest event " + harvest_id.ToString() + " ended");
                }
            }

            // The functions below have to be run even if the harvest is 'org_update_only',
            // and also for the expected data in the test database.

            // -------------------------------------------------------------------
            // MAKE USE OF SEPARATE 'CONTEXT' PROJECT (Same Solution, not DLL)
            // -------------------------------------------------------------------

            ContextDataManager.Source context_source = new ContextDataManager.Source(source.id, source.source_type, source.database_name, source.db_conn,
                                                                                     source.has_study_tables, source.has_study_topics, source.has_study_contributors);
            ContextDataManager.Credentials context_creds = new ContextDataManager.Credentials(creds.Host, creds.Username, creds.Password);

            _logger_helper.LogHeader("Updating context data");
            ContextMain context_main = new ContextMain(_logger);

            context_main.UpdateDataFromContext(context_creds, context_source);

            // -------------------------------------------------------------------
            // MAKE USE OF SEPARATE 'HASH' PROJECT (Same Solution, not DLL)
            // -------------------------------------------------------------------

            // Note the hashes can only be done after all the data is complete, including
            // the organisation and topic codes and names derived above

            HashDataLibrary.Source hash_source = new HashDataLibrary.Source(source.id, source.source_type, source.database_name, source.db_conn,
                                                                            source.has_study_tables, source.has_study_topics, source.has_study_features,
                                                                            source.has_study_contributors, source.has_study_references, source.has_study_relationships,
                                                                            source.has_study_links, source.has_study_ipd_available, source.has_object_datasets,
                                                                            source.has_object_dates, source.has_object_rights, source.has_object_relationships,
                                                                            source.has_object_pubmed_set);

            _logger_helper.LogHeader("Creating Record Hashes");
            HashMain hash_main = new HashMain(_logger, hash_source);

            hash_main.HashData();

            // If harvesting test data it needs to be transferred
            // to the sdcomp schema for safekeeping and further processing
            // If a normal harvest from a full source statistics should be produced.
            // If the harvest was of the manual 'expected' data do neither.

            if (source.source_type != "test")
            {
                if (opts.harvest_type_id == 3)
                {
                    // transfer sd data to test composite data store for later comparison
                    // otherwise it will be overwritten by the next harvest of sd data
                    _test_repo.TransferTestSDData(source);
                }
                else
                {
                    // summarise results by providing stats on the sd tables
                    _logger_helper.LogTableStatistics(source, "sd");
                }
            }
        }