/// <summary>
        /// Inspects the <paramref name="extractDatasetCommand"/> to see if it is a batch load that has
        /// only done part of its full execution.  If so then progress will be recorded and true will be returned
        /// (i.e. run again).
        /// </summary>
        /// <returns></returns>
        /// <exception cref="Exception"></exception>
        private bool IncrementProgressIfAny(ExtractDatasetCommand extractDatasetCommand, IDataLoadEventListener listener)
        {
            var progress = extractDatasetCommand.SelectedDataSets.ExtractionProgressIfAny;

            if (progress == null)
            {
                return(false);
            }

            // if load ended successfully and it is a batch load
            if (extractDatasetCommand.BatchEnd != null)
            {
                // update our progress
                progress.ProgressDate = extractDatasetCommand.BatchEnd.Value;
                progress.SaveToDatabase();
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"Saving batch extraction progress as {progress.ProgressDate}"));

                if (progress.MoreToFetch())
                {
                    // clear the query builder so it can be rebuilt for the new dates
                    extractDatasetCommand.Reset();
                    return(true);
                }

                return(false);
            }
            return(false);
        }
        public void Extract_ProjectSpecificCatalogue_WholeDataset()
        {
            //make the catalogue a custom catalogue for this project
            CustomExtractableDataSet.Project_ID = _project.ID;
            CustomExtractableDataSet.SaveToDatabase();

            var pipe = SetupPipeline();

            pipe.Name = "Extract_ProjectSpecificCatalogue_WholeDataset Pipe";
            pipe.SaveToDatabase();

            _configuration.AddDatasetToConfiguration(CustomExtractableDataSet);

            try
            {
                _request = new ExtractDatasetCommand(_configuration, new ExtractableDatasetBundle(CustomExtractableDataSet));
                ExtractionPipelineUseCase            useCase;
                IExecuteDatasetExtractionDestination results;
                Execute(out useCase, out results);

                var customDataCsv = results.DirectoryPopulated.GetFiles().Single(f => f.Name.Equals("custTable99.csv"));

                Assert.IsNotNull(customDataCsv);

                var lines = File.ReadAllLines(customDataCsv.FullName);

                Assert.AreEqual("SuperSecretThing,ReleaseID", lines[0]);
                Assert.AreEqual("monkeys can all secretly fly,Pub_54321", lines[1]);
                Assert.AreEqual("the wizard of OZ was a man behind a machine,Pub_11ftw", lines[2]);
            }
            finally
            {
                _configuration.RemoveDatasetFromConfiguration(CustomExtractableDataSet);
            }
        }
Exemple #3
0
        private void HandleBatching(ExtractDatasetCommand request, QueryBuilder queryBuilder, IQuerySyntaxHelper syntaxHelper)
        {
            var batch = request.SelectedDataSets.ExtractionProgressIfAny;

            if (batch == null)
            {
                // there is no batching going on
                return;
            }

            // this is a batch resume if we have made some progress already
            request.IsBatchResume = batch.ProgressDate.HasValue;

            DateTime start = batch.ProgressDate ?? batch.StartDate ?? throw new QueryBuildingException($"It was not possible to build a batch extraction query for '{request}' because there is no {nameof(ExtractionProgress.StartDate)} or {nameof(ExtractionProgress.ProgressDate)} set on the {nameof(ExtractionProgress)}");

            if (batch.NumberOfDaysPerBatch <= 0)
            {
                throw new QueryBuildingException($"{ nameof(ExtractionProgress.NumberOfDaysPerBatch)} was {batch.NumberOfDaysPerBatch } for '{request}'");
            }

            var ei = batch.ExtractionInformation;


            DateTime end = start.AddDays(batch.NumberOfDaysPerBatch);

            // Don't load into the future / past end of dataset
            if (end > (batch.EndDate ?? DateTime.Now))
            {
                end = batch.EndDate ?? DateTime.Now;
            }

            request.BatchStart = start;
            request.BatchEnd   = end;

            string line;

            if (!request.IsBatchResume)
            {
                // if it is a first batch, also pull the null dates
                line = $"(({ei.SelectSQL} >= @batchStart AND {ei.SelectSQL} < @batchEnd) OR {ei.SelectSQL} is null)";
            }
            else
            {
                // it is a subsequent batch
                line = $"({ei.SelectSQL} >= @batchStart AND {ei.SelectSQL} < @batchEnd)";
            }


            queryBuilder.AddCustomLine(line, QueryComponent.WHERE);

            var batchStartDeclaration = syntaxHelper.GetParameterDeclaration("@batchStart", new DatabaseTypeRequest(typeof(DateTime)));
            var batchStartParameter   = new ConstantParameter(batchStartDeclaration, FormatDateAsParameterValue(start), null, syntaxHelper);

            queryBuilder.ParameterManager.AddGlobalParameter(batchStartParameter);

            var batchEndDeclaration = syntaxHelper.GetParameterDeclaration("@batchEnd", new DatabaseTypeRequest(typeof(DateTime)));
            var batchEndParameter   = new ConstantParameter(batchEndDeclaration, FormatDateAsParameterValue(end), null, syntaxHelper);

            queryBuilder.ParameterManager.AddGlobalParameter(batchEndParameter);
        }
Exemple #4
0
        private void RegenerateCodeInQueryEditor()
        {
            try
            {
                if (_extractionConfiguration.Cohort_ID == null)
                {
                    throw new Exception("No cohort has been defined for this ExtractionConfiguration");
                }

                //We are generating what the extraction SQL will be like, that only requires the dataset so empty bundle is fine
                _request = new ExtractDatasetCommand(_extractionConfiguration, new ExtractableDatasetBundle(_extractableDataSet));
                _request.GenerateQueryBuilder();

                QueryEditor.ReadOnly = false;

                //get the SQL from the query builder
                QueryEditor.Text     = _request.QueryBuilder.SQL;
                QueryEditor.ReadOnly = true;
                CommonFunctionality.ScintillaGoRed(QueryEditor, false);
            }
            catch (Exception ex)
            {
                CommonFunctionality.ScintillaGoRed(QueryEditor, ex);
            }
        }
        /// <summary>
        /// Returns whether to retry the extraction.  This method may perform a wait operation
        /// before returning true.
        /// </summary>
        /// <param name="extractDatasetCommand"></param>
        /// <param name="listener"></param>
        /// <param name="totalFailureCount"></param>
        /// <param name="consecutiveFailureCount"></param>
        /// <returns></returns>
        private bool ShouldRetry(ExtractDatasetCommand extractDatasetCommand, IDataLoadEventListener listener, int totalFailureCount, int consecutiveFailureCount)
        {
            var progress = extractDatasetCommand.SelectedDataSets.ExtractionProgressIfAny;

            if (progress == null)
            {
                return(false);
            }

            return(progress.ApplyRetryWaitStrategy(Token, listener, totalFailureCount, consecutiveFailureCount));
        }
        public void SetCollection(IActivateItems activator, IPersistableObjectCollection collection)
        {
            _collection = (ExtractionAggregateGraphObjectCollection)collection;
            SetItemActivator(activator);

            var config = _collection.SelectedDataSets.ExtractionConfiguration;
            var ds     = _collection.SelectedDataSets.ExtractableDataSet;

            Request = new ExtractDatasetCommand(config, new ExtractableDatasetBundle(ds));
            Request.GenerateQueryBuilder();

            SetAggregate(activator, _collection.Graph);
            LoadGraphAsync();
        }
Exemple #7
0
        private void TestDataExportOfTvf()
        {
            var config = new ExtractionConfiguration(DataExportRepository, _project);

            config.Cohort_ID = DataExportRepository.GetAllObjects <ExtractableCohort>().Single().ID;
            config.SaveToDatabase();

            var tvfExtractable = new ExtractableDataSet(DataExportRepository, _tvfCatalogue);

            var selected = new SelectedDataSets(DataExportRepository, config, tvfExtractable, null);

            //make all columns part of the extraction
            foreach (ExtractionInformation e in _tvfCatalogue.GetAllExtractionInformation(ExtractionCategory.Any))
            {
                config.AddColumnToExtraction(tvfExtractable, e);
            }

            //the default value should be 10
            Assert.AreEqual("10", _tvfTableInfo.GetAllParameters().Single().Value);

            //configure an extraction specific global of 1 so that only 1 chi number is fetched (which will be in the cohort)
            var globalP = new GlobalExtractionFilterParameter(DataExportRepository, config, "DECLARE @numberOfRecords AS int;");

            globalP.Value = "1";
            globalP.SaveToDatabase();

            var extractionCommand = new ExtractDatasetCommand(config, new ExtractableDatasetBundle(tvfExtractable));

            var source = new ExecuteDatasetExtractionSource();

            source.PreInitialize(extractionCommand, new ThrowImmediatelyDataLoadEventListener());

            var dt = source.GetChunk(new ThrowImmediatelyDataLoadEventListener(), new GracefulCancellationToken());

            Assert.AreEqual(1, dt.Rows.Count);

            Assert.AreEqual("ReleaseId", dt.Columns[0].ColumnName);

            //should be a guid
            Assert.IsTrue(dt.Rows[0][0].ToString().Length > 10);
            Assert.IsTrue(dt.Rows[0][0].ToString().Contains("-"));

            selected.DeleteInDatabase();
            globalP.DeleteInDatabase();
            config.DeleteInDatabase();

            tvfExtractable.DeleteInDatabase();
        }
Exemple #8
0
        protected override void SetUp()
        {
            base.SetUp();

            SetupCatalogueConfigurationEtc();

            SetupDataExport();

            _configuration.Cohort_ID = _extractableCohort.ID;
            _configuration.SaveToDatabase();


            _request = new ExtractDatasetCommand(_configuration, _extractableCohort, new ExtractableDatasetBundle(_extractableDataSet),
                                                 _extractableColumns, new HICProjectSalt(_project),
                                                 new ExtractionDirectory(@"C:\temp\", _configuration));
        }
Exemple #9
0
        protected virtual void Initialize(ExtractDatasetCommand request)
        {
            Request = request;

            if (request == ExtractDatasetCommand.EmptyCommand)
            {
                return;
            }

            _timeSpentValidating          = new Stopwatch();
            _timeSpentCalculatingDISTINCT = new Stopwatch();
            _timeSpentBuckettingDates     = new Stopwatch();

            Request.ColumnsToExtract.Sort();//ensure they are in the right order so we can record the release identifiers

            //if we have a cached builder already
            if (request.QueryBuilder == null)
            {
                request.GenerateQueryBuilder();
            }

            foreach (ReleaseIdentifierSubstitution substitution in Request.ReleaseIdentifierSubstitutions)
            {
                _extractionIdentifiersidx.Add(substitution.GetRuntimeName());
            }

            UniqueReleaseIdentifiersEncountered = new HashSet <object>();

            _catalogue = request.Catalogue;

            if (!string.IsNullOrWhiteSpace(_catalogue.ValidatorXML))
            {
                ExtractionTimeValidator = new ExtractionTimeValidator(_catalogue, request.ColumnsToExtract);
            }

            //if there is a time periodicity ExtractionInformation (AND! it is among the columns the user selected to be extracted)
            if (_catalogue.TimeCoverage_ExtractionInformation_ID != null && request.ColumnsToExtract.Cast <ExtractableColumn>().Any(c => c.CatalogueExtractionInformation_ID == _catalogue.TimeCoverage_ExtractionInformation_ID))
            {
                ExtractionTimeTimeCoverageAggregator = new ExtractionTimeTimeCoverageAggregator(_catalogue, request.ExtractableCohort);
            }
            else
            {
                ExtractionTimeTimeCoverageAggregator = null;
            }
        }
Exemple #10
0
        protected override void OneTimeSetUp()
        {
            base.OneTimeSetUp();

            ProjectDirectory = Path.Combine(TestContext.CurrentContext.WorkDirectory, "TestProject");

            SetupCatalogueConfigurationEtc();

            SetupDataExport();

            _configuration.Cohort_ID = _extractableCohort.ID;
            _configuration.SaveToDatabase();


            _request = new ExtractDatasetCommand(_configuration, _extractableCohort, new ExtractableDatasetBundle(_extractableDataSet),
                                                 _extractableColumns, new HICProjectSalt(_project),
                                                 new ExtractionDirectory(ProjectDirectory, _configuration));
        }
        public void Test_ValidPaths(string goodString, string pattern)
        {
            var sds = WhenIHaveA <SelectedDataSets>();



            sds.ExtractionConfiguration.Project.ExtractionDirectory = TestContext.CurrentContext.WorkDirectory;
            sds.ExtractionConfiguration.Name         = "AAA";
            sds.ExtractableDataSet.Catalogue.Name    = "BBB";
            sds.ExtractableDataSet.Catalogue.Acronym = "C";


            var cmd  = new ExtractDatasetCommand(sds.ExtractionConfiguration, new ExtractableDatasetBundle(sds.ExtractableDataSet));
            var dest = new ExecuteDatasetExtractionFlatFileDestination();

            dest.ExtractionSubdirectoryPattern = goodString;
            Assert.DoesNotThrow(() => dest.Check(new ThrowImmediatelyCheckNotifier()));

            var answer = dest.GetDirectoryFor(cmd);

            StringAssert.IsMatch(pattern, answer.FullName.Replace('\\', '/'));
        }
Exemple #12
0
        private string GetCurrentConfigurationSQL()
        {
            //get the cohort
            var cohort = _repository.GetObjectByID <ExtractableCohort>((int)Configuration.Cohort_ID);

            //get the columns that are configured today
            _columnsToExtract = new List <IColumn>(Configuration.GetAllExtractableColumnsFor(DataSet));
            _columnsToExtract.Sort();

            //get the salt
            var project = _repository.GetObjectByID <Project>(Configuration.Project_ID);
            var salt    = new HICProjectSalt(project);

            //create a request for an empty bundle - only the dataset
            var request = new ExtractDatasetCommand(Configuration, cohort, new ExtractableDatasetBundle(DataSet), _columnsToExtract, salt, null);

            request.GenerateQueryBuilder();

            //Generated the SQL as it would exist today for this extraction
            var resultLive = request.QueryBuilder;

            return(resultLive.SQL);
        }
        public void SQLServerDestination()
        {
            DiscoveredDatabase dbToExtractTo = null;

            var ci = new CatalogueItem(CatalogueRepository, _catalogue, "YearOfBirth");
            var columnToTransform = _columnInfos.Single(c => c.GetRuntimeName().Equals("DateOfBirth", StringComparison.CurrentCultureIgnoreCase));

            string transform = "YEAR(" + columnToTransform.Name + ")";

            var ei = new ExtractionInformation(CatalogueRepository, ci, columnToTransform, transform);

            ei.Alias = "YearOfBirth";
            ei.ExtractionCategory = ExtractionCategory.Core;
            ei.SaveToDatabase();

            //make it part of the ExtractionConfiguration
            var newColumn = new ExtractableColumn(DataExportRepository, _selectedDataSet.ExtractableDataSet, (ExtractionConfiguration)_selectedDataSet.ExtractionConfiguration, ei, 0, ei.SelectSQL);

            newColumn.Alias = ei.Alias;
            newColumn.SaveToDatabase();

            _extractableColumns.Add(newColumn);

            //recreate the extraction command so it gets updated with the new column too.
            _request = new ExtractDatasetCommand(_configuration, _extractableCohort, new ExtractableDatasetBundle(_extractableDataSet),
                                                 _extractableColumns, new HICProjectSalt(_project),
                                                 new ExtractionDirectory(@"C:\temp\", _configuration));

            try
            {
                _configuration.Name = "ExecuteFullExtractionToDatabaseMSSqlDestinationTest";
                _configuration.SaveToDatabase();

                ExtractionPipelineUseCase            execute;
                IExecuteDatasetExtractionDestination result;

                var dbname = TestDatabaseNames.GetConsistentName(_project.Name + "_" + _project.ProjectNumber);
                dbToExtractTo = DiscoveredServerICanCreateRandomDatabasesAndTablesOn.ExpectDatabase(dbname);
                if (dbToExtractTo.Exists())
                {
                    dbToExtractTo.Drop();
                }

                base.Execute(out execute, out result);

                var destinationTable = dbToExtractTo.ExpectTable(_expectedTableName);
                Assert.IsTrue(destinationTable.Exists());

                var dt = destinationTable.GetDataTable();

                Assert.AreEqual(1, dt.Rows.Count);
                Assert.AreEqual(_cohortKeysGenerated[_cohortKeysGenerated.Keys.First()].Trim(), dt.Rows[0]["ReleaseID"]);
                Assert.AreEqual(new DateTime(2001, 1, 1), dt.Rows[0]["DateOfBirth"]);
                Assert.AreEqual(2001, dt.Rows[0]["YearOfBirth"]);

                Assert.AreEqual(columnToTransform.Data_type, destinationTable.DiscoverColumn("DateOfBirth").DataType.SQLType);
                Assert.AreEqual("int", destinationTable.DiscoverColumn("YearOfBirth").DataType.SQLType);
            }
            finally
            {
                if (_extractionServer != null)
                {
                    _extractionServer.DeleteInDatabase();
                }

                if (dbToExtractTo != null)
                {
                    dbToExtractTo.Drop();
                }
            }
        }
Exemple #14
0
        public void CloneWithFilters(bool introduceOrphanExtractionInformation)
        {
            if (introduceOrphanExtractionInformation)
            {
                IntroduceOrphan();
            }

            Assert.IsEmpty(_configuration.ReleaseLog);

            var filter = new ExtractionFilter(CatalogueRepository, "FilterByFish", _extractionInformations[0]);

            try
            {
                //setup a filter with a parameter
                filter.WhereSQL = "Fish = @fish";

                new ParameterCreator(new ExtractionFilterFactory(_extractionInformations[0]), null, null).CreateAll(filter, null);
                filter.SaveToDatabase();

                Assert.IsTrue(filter.ExtractionFilterParameters.Count() == 1);

                //create a root container
                var container = new FilterContainer(DataExportRepository);
                _selectedDataSet.RootFilterContainer_ID = container.ID;
                _selectedDataSet.SaveToDatabase();

                //create a deployed filter
                var importer       = new FilterImporter(new DeployedExtractionFilterFactory(DataExportRepository), null);
                var deployedFilter = (DeployedExtractionFilter)importer.ImportFilter(filter, null);
                deployedFilter.FilterContainer_ID = container.ID;
                deployedFilter.Name = "FilterByFishDeployed";
                deployedFilter.SaveToDatabase();

                var param = deployedFilter.ExtractionFilterParameters[0];
                param.Value = "'jormungander'";
                param.SaveToDatabase();

                ExtractDatasetCommand request = new ExtractDatasetCommand(_configuration, new ExtractableDatasetBundle(_extractableDataSet));
                request.GenerateQueryBuilder();
                Assert.AreEqual(
                    CollapseWhitespace(
                        string.Format(
                            @"DECLARE @fish AS varchar(50);
SET @fish='jormungander';
/*The ID of the cohort in [{0}CohortDatabase]..[Cohort]*/
DECLARE @CohortDefinitionID AS int;
SET @CohortDefinitionID=-599;
/*The project number of project {0}ExtractionConfiguration*/
DECLARE @ProjectNumber AS int;
SET @ProjectNumber=1;

SELECT DISTINCT 
[{0}CohortDatabase]..[Cohort].[ReleaseID] AS ReleaseID,
[{0}ScratchArea].[dbo].[TestTable].[Name],
[{0}ScratchArea].[dbo].[TestTable].[DateOfBirth]
FROM 
[{0}ScratchArea].[dbo].[TestTable] INNER JOIN [{0}CohortDatabase]..[Cohort] ON [{0}ScratchArea].[dbo].[TestTable].[PrivateID]=[{0}CohortDatabase]..[Cohort].[PrivateID]

WHERE
(
/*FilterByFishDeployed*/
Fish = @fish
)
AND
[{0}CohortDatabase]..[Cohort].[cohortDefinition_id]=-599
"
                            , TestDatabaseNames.Prefix))
                    , CollapseWhitespace(request.QueryBuilder.SQL));

                ExtractionConfiguration deepClone = _configuration.DeepCloneWithNewIDs();
                Assert.AreEqual(deepClone.Cohort_ID, _configuration.Cohort_ID);
                Assert.AreNotEqual(deepClone.ID, _configuration.ID);
                try
                {
                    ExtractDatasetCommand request2 = new ExtractDatasetCommand(deepClone, new ExtractableDatasetBundle(_extractableDataSet));
                    request2.GenerateQueryBuilder();

                    Assert.AreEqual(request.QueryBuilder.SQL, request2.QueryBuilder.SQL);
                }
                finally
                {
                    deepClone.DeleteInDatabase();
                }
            }
            finally
            {
                filter.DeleteInDatabase();
            }
        }
Exemple #15
0
        /// <summary>
        /// Checks the <see cref="SelectedDataSet"/> and reports success/failures to the <paramref name="notifier"/>
        /// </summary>
        /// <param name="notifier"></param>
        public void Check(ICheckNotifier notifier)
        {
            var       ds      = SelectedDataSet.ExtractableDataSet;
            var       config  = SelectedDataSet.ExtractionConfiguration;
            var       cohort  = config.Cohort;
            var       project = config.Project;
            const int timeout = 5;

            notifier.OnCheckPerformed(new CheckEventArgs("Inspecting dataset " + ds, CheckResult.Success));

            var selectedcols = new List <IColumn>(config.GetAllExtractableColumnsFor(ds));

            if (!selectedcols.Any())
            {
                notifier.OnCheckPerformed(
                    new CheckEventArgs(
                        "Dataset " + ds + " in configuration '" + config + "' has no selected columns",
                        CheckResult.Fail));

                return;
            }

            ICatalogue cata;

            try
            {
                cata = ds.Catalogue;
            }
            catch (Exception e)
            {
                notifier.OnCheckPerformed(new CheckEventArgs("Unable to find Catalogue for ExtractableDataSet", CheckResult.Fail, e));
                return;
            }

            if (cata.IsInternalDataset)
            {
                notifier.OnCheckPerformed(new CheckEventArgs($"Dataset '{ds}' is marked {nameof(ICatalogue.IsInternalDataset)} so should not be extracted", CheckResult.Fail));
            }

            var request = new ExtractDatasetCommand(config, cohort, new ExtractableDatasetBundle(ds),
                                                    selectedcols, new HICProjectSalt(project), new ExtractionDirectory(project.ExtractionDirectory, config))
            {
                TopX = 1
            };

            try
            {
                request.GenerateQueryBuilder();
            }
            catch (Exception e)
            {
                notifier.OnCheckPerformed(
                    new CheckEventArgs(
                        "Could not generate valid extraction SQL for dataset " + ds +
                        " in configuration " + config, CheckResult.Fail, e));
                return;
            }

            var  server       = request.GetDistinctLiveDatabaseServer();
            bool serverExists = server.Exists();

            notifier.OnCheckPerformed(new CheckEventArgs("Server " + server + " Exists:" + serverExists,
                                                         serverExists ? CheckResult.Success : CheckResult.Fail));

            var cohortServer = request.ExtractableCohort.ExternalCohortTable.Discover();

            if (cohortServer == null || !cohortServer.Exists())
            {
                notifier.OnCheckPerformed(new CheckEventArgs("Cohort server did not exist or was unreachable", CheckResult.Fail));
                return;
            }

            //when 2+ columns have the same Name it's a problem
            foreach (IGrouping <string, IColumn> grouping in request.ColumnsToExtract.GroupBy(c => c.GetRuntimeName()).Where(g => g.Count() > 1))
            {
                notifier.OnCheckPerformed(new CheckEventArgs($"There are { grouping.Count() } columns in the extract ({request.DatasetBundle?.DataSet}) called '{ grouping.Key }'", CheckResult.Fail));
            }

            //when 2+ columns have the same Order it's a problem because
            foreach (IGrouping <int, IColumn> grouping in request.ColumnsToExtract.GroupBy(c => c.Order).Where(g => g.Count() > 1))
            {
                notifier.OnCheckPerformed(new CheckEventArgs($"There are { grouping.Count() } columns in the extract ({request.DatasetBundle?.DataSet}) that share the same Order '{ grouping.Key }'", CheckResult.Fail));
            }

            // Warn user if stuff is out of sync with the Catalogue version (changes have happened to the master but not propgated to the copy in this extraction)
            var outOfSync = selectedcols.OfType <ExtractableColumn>().Where(c => c.IsOutOfSync()).ToArray();

            if (outOfSync.Any())
            {
                notifier.OnCheckPerformed(new CheckEventArgs($"'{ds}' columns out of sync with CatalogueItem version(s): { Environment.NewLine + string.Join(',', outOfSync.Select(o => o.ToString() + Environment.NewLine)) }" +
                                                             $"{ Environment.NewLine } Extraction Configuration: '{config}' ", CheckResult.Warning));
            }

            var nonSelectedCore = cata.GetAllExtractionInformation(ExtractionCategory.Core)
                                  .Union(cata.GetAllExtractionInformation(ExtractionCategory.ProjectSpecific))
                                  .Where(ei => !ei.IsExtractionIdentifier &&
                                         !selectedcols.OfType <ExtractableColumn>().Any(ec => ec.CatalogueExtractionInformation_ID == ei.ID))
                                  .ToArray();

            if (nonSelectedCore.Any())
            {
                notifier.OnCheckPerformed(new CheckEventArgs($"'{ds}' Core columns not selected for extractions: { Environment.NewLine + string.Join(',', nonSelectedCore.Select(o => o.ToString() + Environment.NewLine)) }" +
                                                             $"{ Environment.NewLine } Extraction Configuration: '{config}' ", CheckResult.Warning));
            }

            //Make sure cohort and dataset are on same server before checking (can still get around this at runtime by using ExecuteCrossServerDatasetExtractionSource)
            if (!cohortServer.Server.Name.Equals(server.Name, StringComparison.CurrentCultureIgnoreCase) || !cohortServer.Server.DatabaseType.Equals(server.DatabaseType))
            {
                notifier.OnCheckPerformed(new CheckEventArgs(
                                              $"Cohort is on server '{cohortServer.Server.Name}' ({cohortServer.Server.DatabaseType}) but dataset '{request.DatasetBundle?.DataSet}' is on '{server.Name}' ({server.DatabaseType})"
                                              , CheckResult.Warning));
            }
            else
            {
                //Try to fetch TOP 1 data
                try
                {
                    using (var con = server.BeginNewTransactedConnection())
                    {
                        //incase user somehow manages to write a filter/transform that nukes data or something

                        DbCommand cmd;

                        try
                        {
                            cmd = server.GetCommand(request.QueryBuilder.SQL, con);
                            cmd.CommandTimeout = timeout;
                            notifier.OnCheckPerformed(
                                new CheckEventArgs(
                                    "/*About to send Request SQL :*/" + Environment.NewLine + request.QueryBuilder.SQL,
                                    CheckResult.Success));
                        }
                        catch (QueryBuildingException e)
                        {
                            notifier.OnCheckPerformed(new CheckEventArgs("Failed to assemble query for dataset " + ds,
                                                                         CheckResult.Fail, e));
                            return;
                        }

                        try
                        {
                            using (var r = cmd.ExecuteReader())
                            {
                                if (r.Read())
                                {
                                    notifier.OnCheckPerformed(new CheckEventArgs("Read at least 1 row successfully from dataset " + ds,
                                                                                 CheckResult.Success));
                                }
                                else
                                {
                                    notifier.OnCheckPerformed(new CheckEventArgs("Dataset " + ds + " is completely empty (when linked with the cohort). " +
                                                                                 "Extraction may fail if the Source does not allow empty extractions",
                                                                                 CheckResult.Warning));
                                }
                            }
                        }
                        catch (Exception e)
                        {
                            if (server.GetQuerySyntaxHelper().IsTimeout(e))
                            {
                                notifier.OnCheckPerformed(new CheckEventArgs(ErrorCodes.ExtractTimeoutChecking, e, timeout));
                            }
                            else
                            {
                                notifier.OnCheckPerformed(new CheckEventArgs("Failed to execute the query (See below for query)", CheckResult.Fail, e));
                            }
                        }

                        con.ManagedTransaction.AbandonAndCloseConnection();
                    }
                }
                catch (Exception e)
                {
                    notifier.OnCheckPerformed(new CheckEventArgs("Failed to execute Top 1 on dataset " + ds, CheckResult.Fail, e));
                }
            }

            var fetchOptions = _checkGlobals ? FetchOptions.ExtractableGlobalsAndLocals : FetchOptions.ExtractableLocals;

            foreach (var supportingDocument in cata.GetAllSupportingDocuments(fetchOptions))
            {
                new SupportingDocumentsFetcher(supportingDocument).Check(notifier);
            }

            //check catalogue locals
            foreach (SupportingSQLTable table in cata.GetAllSupportingSQLTablesForCatalogue(fetchOptions))
            {
                new SupportingSQLTableChecker(table).Check(notifier);
            }

            if (_alsoCheckPipeline != null)
            {
                var engine = new ExtractionPipelineUseCase(_activator, request.Project, request, _alsoCheckPipeline, DataLoadInfo.Empty)
                             .GetEngine(_alsoCheckPipeline, new FromCheckNotifierToDataLoadEventListener(notifier));
                engine.Check(notifier);
            }
        }
Exemple #16
0
        /// <summary>
        /// This produces the SQL that would retrieve the specified dataset columns including any JOINS
        ///
        /// <para>It uses:
        /// QueryBuilder and then it adds some custom lines for linking to the cohort</para>
        /// </summary>
        /// <returns></returns>
        public QueryBuilder GetSQLCommandForFullExtractionSet(ExtractDatasetCommand request, out List <ReleaseIdentifierSubstitution> substitutions)
        {
            if (request.QueryBuilder != null)
            {
                throw new Exception("Creation of a QueryBuilder from a request can only happen once, to access the results of the creation use the cached answer in the request.QueryBuilder property");
            }

            if (!request.ColumnsToExtract.Any())
            {
                throw new Exception("No columns are marked for extraction in this configuration");
            }

            if (request.ExtractableCohort == null)
            {
                throw new NullReferenceException("No Cohort selected");
            }

            var databaseType = request.Catalogue.GetDistinctLiveDatabaseServerType();

            if (databaseType == null)
            {
                throw new NotSupportedException("Catalogue " + request.Catalogue + " did not know what DatabaseType it hosted, how can we extract from it! does it have no TableInfos?");
            }

            var syntaxHelper = new QuerySyntaxHelperFactory().Create(databaseType.Value);

            substitutions = new List <ReleaseIdentifierSubstitution>();

            var memoryRepository = new MemoryRepository();

            switch (request.ColumnsToExtract.Count(c => c.IsExtractionIdentifier))
            {
            //no extraction identifiers
            case 0: throw new Exception("There are no Columns in this dataset (" + request + ") marked as IsExtractionIdentifier");

            //a single extraction identifier e.g. CHI X died on date Y with conditions a,b and c
            case 1: substitutions.Add(new ReleaseIdentifierSubstitution(memoryRepository, request.ColumnsToExtract.FirstOrDefault(c => c.IsExtractionIdentifier), request.ExtractableCohort, false, syntaxHelper));
                break;

            //multiple extraction identifiers e.g. Mother X had Babies A, B, C where A,B and C are all CHIs that must be subbed for ProCHIs
            default:
                foreach (IColumn columnToSubstituteForReleaseIdentifier in request.ColumnsToExtract.Where(c => c.IsExtractionIdentifier))
                {
                    substitutions.Add(new ReleaseIdentifierSubstitution(memoryRepository, columnToSubstituteForReleaseIdentifier, request.ExtractableCohort, true, syntaxHelper));
                }
                break;
            }

            string hashingAlgorithm = _repository.DataExportPropertyManager.GetValue(DataExportProperty.HashingAlgorithmPattern);

            if (string.IsNullOrWhiteSpace(hashingAlgorithm))
            {
                hashingAlgorithm = null;
            }

            //identify any tables we are supposed to force join to
            var forcedJoins = request.SelectedDataSets.SelectedDataSetsForcedJoins;

            QueryBuilder queryBuilder = new QueryBuilder("DISTINCT ", hashingAlgorithm, forcedJoins.Select(s => s.TableInfo).ToArray());

            queryBuilder.TopX = request.TopX;

            queryBuilder.SetSalt(request.Salt.GetSalt());

            //add the constant parameters
            foreach (ConstantParameter parameter in GetConstantParameters(syntaxHelper, request.Configuration, request.ExtractableCohort))
            {
                queryBuilder.ParameterManager.AddGlobalParameter(parameter);
            }

            //add the global parameters
            foreach (var globalExtractionFilterParameter in request.Configuration.GlobalExtractionFilterParameters)
            {
                queryBuilder.ParameterManager.AddGlobalParameter(globalExtractionFilterParameter);
            }

            //remove the identification column from the query
            request.ColumnsToExtract.RemoveAll(c => c.IsExtractionIdentifier);

            //add in the ReleaseIdentifier in place of the identification column
            queryBuilder.AddColumnRange(substitutions.ToArray());

            //add the rest of the columns to the query
            queryBuilder.AddColumnRange(request.ColumnsToExtract.Cast <IColumn>().ToArray());

            //add the users selected filters
            queryBuilder.RootFilterContainer = request.Configuration.GetFilterContainerFor(request.DatasetBundle.DataSet);

            ExternalCohortTable externalCohortTable = _repository.GetObjectByID <ExternalCohortTable>(request.ExtractableCohort.ExternalCohortTable_ID);

            if (request.ExtractableCohort != null)
            {
                //the JOIN with the cohort table:
                string cohortJoin;

                if (substitutions.Count == 1)
                {
                    cohortJoin = " INNER JOIN " + externalCohortTable.TableName + " ON " + substitutions.Single().JoinSQL;
                }
                else
                {
                    cohortJoin = " INNER JOIN " + externalCohortTable.TableName + " ON " + string.Join(" OR ", substitutions.Select(s => s.JoinSQL));
                }

                //add the JOIN in after any other joins
                queryBuilder.AddCustomLine(cohortJoin, QueryComponent.JoinInfoJoin);

                //add the filter cohortID because our new Cohort system uses ID number and a giant combo table with all the cohorts in it we need to say Select XX from XX join Cohort Where Cohort number = Y
                queryBuilder.AddCustomLine(request.ExtractableCohort.WhereSQL(), QueryComponent.WHERE);
            }



            request.QueryBuilder = queryBuilder;
            return(queryBuilder);
        }
        public void Extract_ProjectSpecificCatalogue_AppendedColumn()
        {
            //make the catalogue a custom catalogue for this project
            CustomExtractableDataSet.Project_ID = _project.ID;
            CustomExtractableDataSet.SaveToDatabase();

            var pipe = SetupPipeline();

            pipe.Name = "Extract_ProjectSpecificCatalogue_AppendedColumn Pipe";
            pipe.SaveToDatabase();

            var extraColumn   = CustomCatalogue.GetAllExtractionInformation(ExtractionCategory.ProjectSpecific).Single(e => e.GetRuntimeName().Equals("SuperSecretThing"));
            var asExtractable = new ExtractableColumn(DataExportRepository, _extractableDataSet, _configuration, extraColumn, 10, extraColumn.SelectSQL);

            //get rid of any lingering joins
            foreach (JoinInfo j in CatalogueRepository.GetAllObjects <JoinInfo>())
            {
                j.DeleteInDatabase();
            }

            //add the ability to join the two tables in the query
            var idCol      = _extractableDataSet.Catalogue.GetAllExtractionInformation(ExtractionCategory.Core).Single(c => c.IsExtractionIdentifier).ColumnInfo;
            var otherIdCol = CustomCatalogue.GetAllExtractionInformation(ExtractionCategory.ProjectSpecific).Single(e => e.GetRuntimeName().Equals("PrivateID")).ColumnInfo;

            new JoinInfo(CatalogueRepository, idCol, otherIdCol, ExtractionJoinType.Left, null);

            //generate a new request (this will include the newly created column)
            _request = new ExtractDatasetCommand(_configuration, new ExtractableDatasetBundle(_extractableDataSet));

            var tbl = Database.ExpectTable("TestTable");

            tbl.Truncate();

            using (var blk = tbl.BeginBulkInsert())
            {
                var dt = new DataTable();
                dt.Columns.Add("PrivateID");
                dt.Columns.Add("Name");
                dt.Columns.Add("DateOfBirth");

                dt.Rows.Add(new object[] { "Priv_12345", "Bob", "2001-01-01" });
                dt.Rows.Add(new object[] { "Priv_wtf11", "Frank", "2001-10-29" });
                blk.Upload(dt);
            }

            ExtractionPipelineUseCase            useCase;
            IExecuteDatasetExtractionDestination results;

            Execute(out useCase, out results);

            var mainDataTableCsv = results.DirectoryPopulated.GetFiles().Single(f => f.Name.Equals("TestTable.csv"));

            Assert.IsNotNull(mainDataTableCsv);
            Assert.AreEqual("TestTable.csv", mainDataTableCsv.Name);

            var lines = File.ReadAllLines(mainDataTableCsv.FullName);

            Assert.AreEqual("ReleaseID,Name,DateOfBirth,SuperSecretThing", lines[0]);

            var bobLine   = lines.Single(l => l.StartsWith("Pub_54321,Bob"));
            var frankLine = lines.Single(l => l.StartsWith("Pub_11ftw,Frank"));

            Assert.AreEqual("Pub_54321,Bob,2001-01-01,monkeys can all secretly fly", bobLine);
            Assert.AreEqual("Pub_11ftw,Frank,2001-10-29,the wizard of OZ was a man behind a machine", frankLine);

            asExtractable.DeleteInDatabase();
        }
Exemple #18
0
        /// <summary>
        /// Checks the <see cref="SelectedDataSet"/> and reports success/failures to the <paramref name="notifier"/>
        /// </summary>
        /// <param name="notifier"></param>
        public void Check(ICheckNotifier notifier)
        {
            var       ds      = SelectedDataSet.ExtractableDataSet;
            var       config  = SelectedDataSet.ExtractionConfiguration;
            var       cohort  = config.Cohort;
            var       project = config.Project;
            const int timeout = 5;

            notifier.OnCheckPerformed(new CheckEventArgs("Inspecting dataset " + ds, CheckResult.Success));

            var selectedcols = new List <IColumn>(config.GetAllExtractableColumnsFor(ds));

            if (!selectedcols.Any())
            {
                notifier.OnCheckPerformed(
                    new CheckEventArgs(
                        "Dataset " + ds + " in configuration '" + config + "' has no selected columns",
                        CheckResult.Fail));

                return;
            }

            var request = new ExtractDatasetCommand(config, cohort, new ExtractableDatasetBundle(ds),
                                                    selectedcols, new HICProjectSalt(project), new ExtractionDirectory(project.ExtractionDirectory, config))
            {
                TopX = 1
            };

            try
            {
                request.GenerateQueryBuilder();
            }
            catch (Exception e)
            {
                notifier.OnCheckPerformed(
                    new CheckEventArgs(
                        "Could not generate valid extraction SQL for dataset " + ds +
                        " in configuration " + config, CheckResult.Fail, e));
                return;
            }

            var  server       = request.GetDistinctLiveDatabaseServer();
            bool serverExists = server.Exists();

            notifier.OnCheckPerformed(new CheckEventArgs("Server " + server + " Exists:" + serverExists,
                                                         serverExists ? CheckResult.Success : CheckResult.Fail));

            var cohortServer = request.ExtractableCohort.ExternalCohortTable.Discover();

            if (cohortServer == null || !cohortServer.Exists())
            {
                notifier.OnCheckPerformed(new CheckEventArgs("Cohort server did not exist or was unreachable", CheckResult.Fail));
                return;
            }

            foreach (IGrouping <string, IColumn> grouping in request.ColumnsToExtract.GroupBy(c => c.GetRuntimeName()).Where(g => g.Count() > 1))
            {
                notifier.OnCheckPerformed(new CheckEventArgs("There are " + grouping.Count() + " columns in the extract called '" + grouping.Key + "'", CheckResult.Fail));
            }

            //Make sure cohort and dataset are on same server before checking (can still get around this at runtime by using ExecuteCrossServerDatasetExtractionSource)
            if (!cohortServer.Server.Name.Equals(server.Name, StringComparison.CurrentCultureIgnoreCase) || !cohortServer.Server.DatabaseType.Equals(server.DatabaseType))
            {
                notifier.OnCheckPerformed(new CheckEventArgs(
                                              string.Format("Cohort is on server '{0}' ({1}) but dataset is on '{2}' ({3})",
                                                            cohortServer.Server.Name,
                                                            cohortServer.Server.DatabaseType,
                                                            server.Name, server.DatabaseType), CheckResult.Warning));
            }
            else
            {
                //Try to fetch TOP 1 data
                try
                {
                    using (var con = server.BeginNewTransactedConnection())
                    {
                        //incase user somehow manages to write a filter/transform that nukes data or something

                        DbCommand cmd;

                        try
                        {
                            cmd = server.GetCommand(request.QueryBuilder.SQL, con);
                            cmd.CommandTimeout = timeout;
                            notifier.OnCheckPerformed(
                                new CheckEventArgs(
                                    "/*About to send Request SQL :*/" + Environment.NewLine + request.QueryBuilder.SQL,
                                    CheckResult.Success));
                        }
                        catch (QueryBuildingException e)
                        {
                            notifier.OnCheckPerformed(new CheckEventArgs("Failed to assemble query for dataset " + ds,
                                                                         CheckResult.Fail, e));
                            return;
                        }

                        try
                        {
                            using (var r = cmd.ExecuteReader())
                            {
                                if (r.Read())
                                {
                                    notifier.OnCheckPerformed(new CheckEventArgs("Read at least 1 row successfully from dataset " + ds,
                                                                                 CheckResult.Success));
                                }
                                else
                                {
                                    notifier.OnCheckPerformed(new CheckEventArgs("Dataset " + ds + " is completely empty (when linked with the cohort). " +
                                                                                 "Extraction may fail if the Source does not allow empty extractions",
                                                                                 CheckResult.Warning));
                                }
                            }
                        }
                        catch (Exception e)
                        {
                            if (server.GetQuerySyntaxHelper().IsTimeout(e))
                            {
                                notifier.OnCheckPerformed(new CheckEventArgs("Failed to read rows after " + timeout + "s", CheckResult.Warning, e));
                            }
                            else
                            {
                                notifier.OnCheckPerformed(new CheckEventArgs("Failed to execute the query (See below for query)", CheckResult.Fail, e));
                            }
                        }

                        con.ManagedTransaction.AbandonAndCloseConnection();
                    }
                }
                catch (Exception e)
                {
                    notifier.OnCheckPerformed(new CheckEventArgs("Failed to execute Top 1 on dataset " + ds, CheckResult.Fail, e));
                }
            }

            var cata         = ds.Catalogue;
            var fetchOptions = _checkGlobals ? FetchOptions.ExtractableGlobalsAndLocals : FetchOptions.ExtractableLocals;

            foreach (var supportingDocument in cata.GetAllSupportingDocuments(fetchOptions))
            {
                new SupportingDocumentsFetcher(supportingDocument).Check(notifier);
            }

            //check catalogue locals
            foreach (SupportingSQLTable table in cata.GetAllSupportingSQLTablesForCatalogue(fetchOptions))
            {
                new SupportingSQLTableChecker(table).Check(notifier);
            }

            if (_alsoCheckPipeline != null)
            {
                var engine = new ExtractionPipelineUseCase(request.Project, request, _alsoCheckPipeline, DataLoadInfo.Empty)
                             .GetEngine(_alsoCheckPipeline, new FromCheckNotifierToDataLoadEventListener(notifier));
                engine.Check(notifier);
            }
        }
        public void Extract_ProjectSpecificCatalogue_FilterReference()
        {
            //make the catalogue a custom catalogue for this project
            CustomExtractableDataSet.Project_ID = _project.ID;
            CustomExtractableDataSet.SaveToDatabase();

            var pipe = SetupPipeline();

            pipe.Name = "Extract_ProjectSpecificCatalogue_FilterReference Pipe";
            pipe.SaveToDatabase();

            var rootContainer = new FilterContainer(DataExportRepository);

            _selectedDataSet.RootFilterContainer_ID = rootContainer.ID;
            _selectedDataSet.SaveToDatabase();

            var filter = new DeployedExtractionFilter(DataExportRepository, "monkeys only", rootContainer);

            filter.WhereSQL = "SuperSecretThing = 'monkeys can all secretly fly'";
            filter.SaveToDatabase();
            rootContainer.AddChild(filter);

            //get rid of any lingering joins
            foreach (JoinInfo j in CatalogueRepository.GetAllObjects <JoinInfo>())
            {
                j.DeleteInDatabase();
            }

            //add the ability to join the two tables in the query
            var idCol      = _extractableDataSet.Catalogue.GetAllExtractionInformation(ExtractionCategory.Core).Single(c => c.IsExtractionIdentifier).ColumnInfo;
            var otherIdCol = CustomCatalogue.GetAllExtractionInformation(ExtractionCategory.ProjectSpecific).Single(e => e.GetRuntimeName().Equals("PrivateID")).ColumnInfo;

            new JoinInfo(CatalogueRepository, idCol, otherIdCol, ExtractionJoinType.Left, null);

            new SelectedDataSetsForcedJoin(DataExportRepository, _selectedDataSet, CustomTableInfo);

            //generate a new request (this will include the newly created column)
            _request = new ExtractDatasetCommand(_configuration, new ExtractableDatasetBundle(_extractableDataSet));

            var tbl = Database.ExpectTable("TestTable");

            tbl.Truncate();

            using (var blk = tbl.BeginBulkInsert())
            {
                var dt = new DataTable();
                dt.Columns.Add("PrivateID");
                dt.Columns.Add("Name");
                dt.Columns.Add("DateOfBirth");

                dt.Rows.Add(new object[] { "Priv_12345", "Bob", "2001-01-01" });
                dt.Rows.Add(new object[] { "Priv_wtf11", "Frank", "2001-10-29" });
                blk.Upload(dt);
            }

            ExtractionPipelineUseCase            useCase;
            IExecuteDatasetExtractionDestination results;

            Execute(out useCase, out results);

            var mainDataTableCsv = results.DirectoryPopulated.GetFiles().Single(f => f.Name.Equals("TestTable.csv"));

            Assert.IsNotNull(mainDataTableCsv);

            var lines = File.ReadAllLines(mainDataTableCsv.FullName);

            Assert.AreEqual("ReleaseID,Name,DateOfBirth", lines[0]);
            Assert.AreEqual("Pub_54321,Bob,2001-01-01", lines[1]);
            Assert.AreEqual(2, lines.Length);
        }