/// <summary> /// Inspects the <paramref name="extractDatasetCommand"/> to see if it is a batch load that has /// only done part of its full execution. If so then progress will be recorded and true will be returned /// (i.e. run again). /// </summary> /// <returns></returns> /// <exception cref="Exception"></exception> private bool IncrementProgressIfAny(ExtractDatasetCommand extractDatasetCommand, IDataLoadEventListener listener) { var progress = extractDatasetCommand.SelectedDataSets.ExtractionProgressIfAny; if (progress == null) { return(false); } // if load ended successfully and it is a batch load if (extractDatasetCommand.BatchEnd != null) { // update our progress progress.ProgressDate = extractDatasetCommand.BatchEnd.Value; progress.SaveToDatabase(); listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"Saving batch extraction progress as {progress.ProgressDate}")); if (progress.MoreToFetch()) { // clear the query builder so it can be rebuilt for the new dates extractDatasetCommand.Reset(); return(true); } return(false); } return(false); }
public void Extract_ProjectSpecificCatalogue_WholeDataset() { //make the catalogue a custom catalogue for this project CustomExtractableDataSet.Project_ID = _project.ID; CustomExtractableDataSet.SaveToDatabase(); var pipe = SetupPipeline(); pipe.Name = "Extract_ProjectSpecificCatalogue_WholeDataset Pipe"; pipe.SaveToDatabase(); _configuration.AddDatasetToConfiguration(CustomExtractableDataSet); try { _request = new ExtractDatasetCommand(_configuration, new ExtractableDatasetBundle(CustomExtractableDataSet)); ExtractionPipelineUseCase useCase; IExecuteDatasetExtractionDestination results; Execute(out useCase, out results); var customDataCsv = results.DirectoryPopulated.GetFiles().Single(f => f.Name.Equals("custTable99.csv")); Assert.IsNotNull(customDataCsv); var lines = File.ReadAllLines(customDataCsv.FullName); Assert.AreEqual("SuperSecretThing,ReleaseID", lines[0]); Assert.AreEqual("monkeys can all secretly fly,Pub_54321", lines[1]); Assert.AreEqual("the wizard of OZ was a man behind a machine,Pub_11ftw", lines[2]); } finally { _configuration.RemoveDatasetFromConfiguration(CustomExtractableDataSet); } }
private void HandleBatching(ExtractDatasetCommand request, QueryBuilder queryBuilder, IQuerySyntaxHelper syntaxHelper) { var batch = request.SelectedDataSets.ExtractionProgressIfAny; if (batch == null) { // there is no batching going on return; } // this is a batch resume if we have made some progress already request.IsBatchResume = batch.ProgressDate.HasValue; DateTime start = batch.ProgressDate ?? batch.StartDate ?? throw new QueryBuildingException($"It was not possible to build a batch extraction query for '{request}' because there is no {nameof(ExtractionProgress.StartDate)} or {nameof(ExtractionProgress.ProgressDate)} set on the {nameof(ExtractionProgress)}"); if (batch.NumberOfDaysPerBatch <= 0) { throw new QueryBuildingException($"{ nameof(ExtractionProgress.NumberOfDaysPerBatch)} was {batch.NumberOfDaysPerBatch } for '{request}'"); } var ei = batch.ExtractionInformation; DateTime end = start.AddDays(batch.NumberOfDaysPerBatch); // Don't load into the future / past end of dataset if (end > (batch.EndDate ?? DateTime.Now)) { end = batch.EndDate ?? DateTime.Now; } request.BatchStart = start; request.BatchEnd = end; string line; if (!request.IsBatchResume) { // if it is a first batch, also pull the null dates line = $"(({ei.SelectSQL} >= @batchStart AND {ei.SelectSQL} < @batchEnd) OR {ei.SelectSQL} is null)"; } else { // it is a subsequent batch line = $"({ei.SelectSQL} >= @batchStart AND {ei.SelectSQL} < @batchEnd)"; } queryBuilder.AddCustomLine(line, QueryComponent.WHERE); var batchStartDeclaration = syntaxHelper.GetParameterDeclaration("@batchStart", new DatabaseTypeRequest(typeof(DateTime))); var batchStartParameter = new ConstantParameter(batchStartDeclaration, FormatDateAsParameterValue(start), null, syntaxHelper); queryBuilder.ParameterManager.AddGlobalParameter(batchStartParameter); var batchEndDeclaration = syntaxHelper.GetParameterDeclaration("@batchEnd", new DatabaseTypeRequest(typeof(DateTime))); var batchEndParameter = new ConstantParameter(batchEndDeclaration, FormatDateAsParameterValue(end), null, syntaxHelper); queryBuilder.ParameterManager.AddGlobalParameter(batchEndParameter); }
private void RegenerateCodeInQueryEditor() { try { if (_extractionConfiguration.Cohort_ID == null) { throw new Exception("No cohort has been defined for this ExtractionConfiguration"); } //We are generating what the extraction SQL will be like, that only requires the dataset so empty bundle is fine _request = new ExtractDatasetCommand(_extractionConfiguration, new ExtractableDatasetBundle(_extractableDataSet)); _request.GenerateQueryBuilder(); QueryEditor.ReadOnly = false; //get the SQL from the query builder QueryEditor.Text = _request.QueryBuilder.SQL; QueryEditor.ReadOnly = true; CommonFunctionality.ScintillaGoRed(QueryEditor, false); } catch (Exception ex) { CommonFunctionality.ScintillaGoRed(QueryEditor, ex); } }
/// <summary> /// Returns whether to retry the extraction. This method may perform a wait operation /// before returning true. /// </summary> /// <param name="extractDatasetCommand"></param> /// <param name="listener"></param> /// <param name="totalFailureCount"></param> /// <param name="consecutiveFailureCount"></param> /// <returns></returns> private bool ShouldRetry(ExtractDatasetCommand extractDatasetCommand, IDataLoadEventListener listener, int totalFailureCount, int consecutiveFailureCount) { var progress = extractDatasetCommand.SelectedDataSets.ExtractionProgressIfAny; if (progress == null) { return(false); } return(progress.ApplyRetryWaitStrategy(Token, listener, totalFailureCount, consecutiveFailureCount)); }
public void SetCollection(IActivateItems activator, IPersistableObjectCollection collection) { _collection = (ExtractionAggregateGraphObjectCollection)collection; SetItemActivator(activator); var config = _collection.SelectedDataSets.ExtractionConfiguration; var ds = _collection.SelectedDataSets.ExtractableDataSet; Request = new ExtractDatasetCommand(config, new ExtractableDatasetBundle(ds)); Request.GenerateQueryBuilder(); SetAggregate(activator, _collection.Graph); LoadGraphAsync(); }
private void TestDataExportOfTvf() { var config = new ExtractionConfiguration(DataExportRepository, _project); config.Cohort_ID = DataExportRepository.GetAllObjects <ExtractableCohort>().Single().ID; config.SaveToDatabase(); var tvfExtractable = new ExtractableDataSet(DataExportRepository, _tvfCatalogue); var selected = new SelectedDataSets(DataExportRepository, config, tvfExtractable, null); //make all columns part of the extraction foreach (ExtractionInformation e in _tvfCatalogue.GetAllExtractionInformation(ExtractionCategory.Any)) { config.AddColumnToExtraction(tvfExtractable, e); } //the default value should be 10 Assert.AreEqual("10", _tvfTableInfo.GetAllParameters().Single().Value); //configure an extraction specific global of 1 so that only 1 chi number is fetched (which will be in the cohort) var globalP = new GlobalExtractionFilterParameter(DataExportRepository, config, "DECLARE @numberOfRecords AS int;"); globalP.Value = "1"; globalP.SaveToDatabase(); var extractionCommand = new ExtractDatasetCommand(config, new ExtractableDatasetBundle(tvfExtractable)); var source = new ExecuteDatasetExtractionSource(); source.PreInitialize(extractionCommand, new ThrowImmediatelyDataLoadEventListener()); var dt = source.GetChunk(new ThrowImmediatelyDataLoadEventListener(), new GracefulCancellationToken()); Assert.AreEqual(1, dt.Rows.Count); Assert.AreEqual("ReleaseId", dt.Columns[0].ColumnName); //should be a guid Assert.IsTrue(dt.Rows[0][0].ToString().Length > 10); Assert.IsTrue(dt.Rows[0][0].ToString().Contains("-")); selected.DeleteInDatabase(); globalP.DeleteInDatabase(); config.DeleteInDatabase(); tvfExtractable.DeleteInDatabase(); }
protected override void SetUp() { base.SetUp(); SetupCatalogueConfigurationEtc(); SetupDataExport(); _configuration.Cohort_ID = _extractableCohort.ID; _configuration.SaveToDatabase(); _request = new ExtractDatasetCommand(_configuration, _extractableCohort, new ExtractableDatasetBundle(_extractableDataSet), _extractableColumns, new HICProjectSalt(_project), new ExtractionDirectory(@"C:\temp\", _configuration)); }
protected virtual void Initialize(ExtractDatasetCommand request) { Request = request; if (request == ExtractDatasetCommand.EmptyCommand) { return; } _timeSpentValidating = new Stopwatch(); _timeSpentCalculatingDISTINCT = new Stopwatch(); _timeSpentBuckettingDates = new Stopwatch(); Request.ColumnsToExtract.Sort();//ensure they are in the right order so we can record the release identifiers //if we have a cached builder already if (request.QueryBuilder == null) { request.GenerateQueryBuilder(); } foreach (ReleaseIdentifierSubstitution substitution in Request.ReleaseIdentifierSubstitutions) { _extractionIdentifiersidx.Add(substitution.GetRuntimeName()); } UniqueReleaseIdentifiersEncountered = new HashSet <object>(); _catalogue = request.Catalogue; if (!string.IsNullOrWhiteSpace(_catalogue.ValidatorXML)) { ExtractionTimeValidator = new ExtractionTimeValidator(_catalogue, request.ColumnsToExtract); } //if there is a time periodicity ExtractionInformation (AND! it is among the columns the user selected to be extracted) if (_catalogue.TimeCoverage_ExtractionInformation_ID != null && request.ColumnsToExtract.Cast <ExtractableColumn>().Any(c => c.CatalogueExtractionInformation_ID == _catalogue.TimeCoverage_ExtractionInformation_ID)) { ExtractionTimeTimeCoverageAggregator = new ExtractionTimeTimeCoverageAggregator(_catalogue, request.ExtractableCohort); } else { ExtractionTimeTimeCoverageAggregator = null; } }
protected override void OneTimeSetUp() { base.OneTimeSetUp(); ProjectDirectory = Path.Combine(TestContext.CurrentContext.WorkDirectory, "TestProject"); SetupCatalogueConfigurationEtc(); SetupDataExport(); _configuration.Cohort_ID = _extractableCohort.ID; _configuration.SaveToDatabase(); _request = new ExtractDatasetCommand(_configuration, _extractableCohort, new ExtractableDatasetBundle(_extractableDataSet), _extractableColumns, new HICProjectSalt(_project), new ExtractionDirectory(ProjectDirectory, _configuration)); }
public void Test_ValidPaths(string goodString, string pattern) { var sds = WhenIHaveA <SelectedDataSets>(); sds.ExtractionConfiguration.Project.ExtractionDirectory = TestContext.CurrentContext.WorkDirectory; sds.ExtractionConfiguration.Name = "AAA"; sds.ExtractableDataSet.Catalogue.Name = "BBB"; sds.ExtractableDataSet.Catalogue.Acronym = "C"; var cmd = new ExtractDatasetCommand(sds.ExtractionConfiguration, new ExtractableDatasetBundle(sds.ExtractableDataSet)); var dest = new ExecuteDatasetExtractionFlatFileDestination(); dest.ExtractionSubdirectoryPattern = goodString; Assert.DoesNotThrow(() => dest.Check(new ThrowImmediatelyCheckNotifier())); var answer = dest.GetDirectoryFor(cmd); StringAssert.IsMatch(pattern, answer.FullName.Replace('\\', '/')); }
private string GetCurrentConfigurationSQL() { //get the cohort var cohort = _repository.GetObjectByID <ExtractableCohort>((int)Configuration.Cohort_ID); //get the columns that are configured today _columnsToExtract = new List <IColumn>(Configuration.GetAllExtractableColumnsFor(DataSet)); _columnsToExtract.Sort(); //get the salt var project = _repository.GetObjectByID <Project>(Configuration.Project_ID); var salt = new HICProjectSalt(project); //create a request for an empty bundle - only the dataset var request = new ExtractDatasetCommand(Configuration, cohort, new ExtractableDatasetBundle(DataSet), _columnsToExtract, salt, null); request.GenerateQueryBuilder(); //Generated the SQL as it would exist today for this extraction var resultLive = request.QueryBuilder; return(resultLive.SQL); }
public void SQLServerDestination() { DiscoveredDatabase dbToExtractTo = null; var ci = new CatalogueItem(CatalogueRepository, _catalogue, "YearOfBirth"); var columnToTransform = _columnInfos.Single(c => c.GetRuntimeName().Equals("DateOfBirth", StringComparison.CurrentCultureIgnoreCase)); string transform = "YEAR(" + columnToTransform.Name + ")"; var ei = new ExtractionInformation(CatalogueRepository, ci, columnToTransform, transform); ei.Alias = "YearOfBirth"; ei.ExtractionCategory = ExtractionCategory.Core; ei.SaveToDatabase(); //make it part of the ExtractionConfiguration var newColumn = new ExtractableColumn(DataExportRepository, _selectedDataSet.ExtractableDataSet, (ExtractionConfiguration)_selectedDataSet.ExtractionConfiguration, ei, 0, ei.SelectSQL); newColumn.Alias = ei.Alias; newColumn.SaveToDatabase(); _extractableColumns.Add(newColumn); //recreate the extraction command so it gets updated with the new column too. _request = new ExtractDatasetCommand(_configuration, _extractableCohort, new ExtractableDatasetBundle(_extractableDataSet), _extractableColumns, new HICProjectSalt(_project), new ExtractionDirectory(@"C:\temp\", _configuration)); try { _configuration.Name = "ExecuteFullExtractionToDatabaseMSSqlDestinationTest"; _configuration.SaveToDatabase(); ExtractionPipelineUseCase execute; IExecuteDatasetExtractionDestination result; var dbname = TestDatabaseNames.GetConsistentName(_project.Name + "_" + _project.ProjectNumber); dbToExtractTo = DiscoveredServerICanCreateRandomDatabasesAndTablesOn.ExpectDatabase(dbname); if (dbToExtractTo.Exists()) { dbToExtractTo.Drop(); } base.Execute(out execute, out result); var destinationTable = dbToExtractTo.ExpectTable(_expectedTableName); Assert.IsTrue(destinationTable.Exists()); var dt = destinationTable.GetDataTable(); Assert.AreEqual(1, dt.Rows.Count); Assert.AreEqual(_cohortKeysGenerated[_cohortKeysGenerated.Keys.First()].Trim(), dt.Rows[0]["ReleaseID"]); Assert.AreEqual(new DateTime(2001, 1, 1), dt.Rows[0]["DateOfBirth"]); Assert.AreEqual(2001, dt.Rows[0]["YearOfBirth"]); Assert.AreEqual(columnToTransform.Data_type, destinationTable.DiscoverColumn("DateOfBirth").DataType.SQLType); Assert.AreEqual("int", destinationTable.DiscoverColumn("YearOfBirth").DataType.SQLType); } finally { if (_extractionServer != null) { _extractionServer.DeleteInDatabase(); } if (dbToExtractTo != null) { dbToExtractTo.Drop(); } } }
public void CloneWithFilters(bool introduceOrphanExtractionInformation) { if (introduceOrphanExtractionInformation) { IntroduceOrphan(); } Assert.IsEmpty(_configuration.ReleaseLog); var filter = new ExtractionFilter(CatalogueRepository, "FilterByFish", _extractionInformations[0]); try { //setup a filter with a parameter filter.WhereSQL = "Fish = @fish"; new ParameterCreator(new ExtractionFilterFactory(_extractionInformations[0]), null, null).CreateAll(filter, null); filter.SaveToDatabase(); Assert.IsTrue(filter.ExtractionFilterParameters.Count() == 1); //create a root container var container = new FilterContainer(DataExportRepository); _selectedDataSet.RootFilterContainer_ID = container.ID; _selectedDataSet.SaveToDatabase(); //create a deployed filter var importer = new FilterImporter(new DeployedExtractionFilterFactory(DataExportRepository), null); var deployedFilter = (DeployedExtractionFilter)importer.ImportFilter(filter, null); deployedFilter.FilterContainer_ID = container.ID; deployedFilter.Name = "FilterByFishDeployed"; deployedFilter.SaveToDatabase(); var param = deployedFilter.ExtractionFilterParameters[0]; param.Value = "'jormungander'"; param.SaveToDatabase(); ExtractDatasetCommand request = new ExtractDatasetCommand(_configuration, new ExtractableDatasetBundle(_extractableDataSet)); request.GenerateQueryBuilder(); Assert.AreEqual( CollapseWhitespace( string.Format( @"DECLARE @fish AS varchar(50); SET @fish='jormungander'; /*The ID of the cohort in [{0}CohortDatabase]..[Cohort]*/ DECLARE @CohortDefinitionID AS int; SET @CohortDefinitionID=-599; /*The project number of project {0}ExtractionConfiguration*/ DECLARE @ProjectNumber AS int; SET @ProjectNumber=1; SELECT DISTINCT [{0}CohortDatabase]..[Cohort].[ReleaseID] AS ReleaseID, [{0}ScratchArea].[dbo].[TestTable].[Name], [{0}ScratchArea].[dbo].[TestTable].[DateOfBirth] FROM [{0}ScratchArea].[dbo].[TestTable] INNER JOIN [{0}CohortDatabase]..[Cohort] ON [{0}ScratchArea].[dbo].[TestTable].[PrivateID]=[{0}CohortDatabase]..[Cohort].[PrivateID] WHERE ( /*FilterByFishDeployed*/ Fish = @fish ) AND [{0}CohortDatabase]..[Cohort].[cohortDefinition_id]=-599 " , TestDatabaseNames.Prefix)) , CollapseWhitespace(request.QueryBuilder.SQL)); ExtractionConfiguration deepClone = _configuration.DeepCloneWithNewIDs(); Assert.AreEqual(deepClone.Cohort_ID, _configuration.Cohort_ID); Assert.AreNotEqual(deepClone.ID, _configuration.ID); try { ExtractDatasetCommand request2 = new ExtractDatasetCommand(deepClone, new ExtractableDatasetBundle(_extractableDataSet)); request2.GenerateQueryBuilder(); Assert.AreEqual(request.QueryBuilder.SQL, request2.QueryBuilder.SQL); } finally { deepClone.DeleteInDatabase(); } } finally { filter.DeleteInDatabase(); } }
/// <summary> /// Checks the <see cref="SelectedDataSet"/> and reports success/failures to the <paramref name="notifier"/> /// </summary> /// <param name="notifier"></param> public void Check(ICheckNotifier notifier) { var ds = SelectedDataSet.ExtractableDataSet; var config = SelectedDataSet.ExtractionConfiguration; var cohort = config.Cohort; var project = config.Project; const int timeout = 5; notifier.OnCheckPerformed(new CheckEventArgs("Inspecting dataset " + ds, CheckResult.Success)); var selectedcols = new List <IColumn>(config.GetAllExtractableColumnsFor(ds)); if (!selectedcols.Any()) { notifier.OnCheckPerformed( new CheckEventArgs( "Dataset " + ds + " in configuration '" + config + "' has no selected columns", CheckResult.Fail)); return; } ICatalogue cata; try { cata = ds.Catalogue; } catch (Exception e) { notifier.OnCheckPerformed(new CheckEventArgs("Unable to find Catalogue for ExtractableDataSet", CheckResult.Fail, e)); return; } if (cata.IsInternalDataset) { notifier.OnCheckPerformed(new CheckEventArgs($"Dataset '{ds}' is marked {nameof(ICatalogue.IsInternalDataset)} so should not be extracted", CheckResult.Fail)); } var request = new ExtractDatasetCommand(config, cohort, new ExtractableDatasetBundle(ds), selectedcols, new HICProjectSalt(project), new ExtractionDirectory(project.ExtractionDirectory, config)) { TopX = 1 }; try { request.GenerateQueryBuilder(); } catch (Exception e) { notifier.OnCheckPerformed( new CheckEventArgs( "Could not generate valid extraction SQL for dataset " + ds + " in configuration " + config, CheckResult.Fail, e)); return; } var server = request.GetDistinctLiveDatabaseServer(); bool serverExists = server.Exists(); notifier.OnCheckPerformed(new CheckEventArgs("Server " + server + " Exists:" + serverExists, serverExists ? CheckResult.Success : CheckResult.Fail)); var cohortServer = request.ExtractableCohort.ExternalCohortTable.Discover(); if (cohortServer == null || !cohortServer.Exists()) { notifier.OnCheckPerformed(new CheckEventArgs("Cohort server did not exist or was unreachable", CheckResult.Fail)); return; } //when 2+ columns have the same Name it's a problem foreach (IGrouping <string, IColumn> grouping in request.ColumnsToExtract.GroupBy(c => c.GetRuntimeName()).Where(g => g.Count() > 1)) { notifier.OnCheckPerformed(new CheckEventArgs($"There are { grouping.Count() } columns in the extract ({request.DatasetBundle?.DataSet}) called '{ grouping.Key }'", CheckResult.Fail)); } //when 2+ columns have the same Order it's a problem because foreach (IGrouping <int, IColumn> grouping in request.ColumnsToExtract.GroupBy(c => c.Order).Where(g => g.Count() > 1)) { notifier.OnCheckPerformed(new CheckEventArgs($"There are { grouping.Count() } columns in the extract ({request.DatasetBundle?.DataSet}) that share the same Order '{ grouping.Key }'", CheckResult.Fail)); } // Warn user if stuff is out of sync with the Catalogue version (changes have happened to the master but not propgated to the copy in this extraction) var outOfSync = selectedcols.OfType <ExtractableColumn>().Where(c => c.IsOutOfSync()).ToArray(); if (outOfSync.Any()) { notifier.OnCheckPerformed(new CheckEventArgs($"'{ds}' columns out of sync with CatalogueItem version(s): { Environment.NewLine + string.Join(',', outOfSync.Select(o => o.ToString() + Environment.NewLine)) }" + $"{ Environment.NewLine } Extraction Configuration: '{config}' ", CheckResult.Warning)); } var nonSelectedCore = cata.GetAllExtractionInformation(ExtractionCategory.Core) .Union(cata.GetAllExtractionInformation(ExtractionCategory.ProjectSpecific)) .Where(ei => !ei.IsExtractionIdentifier && !selectedcols.OfType <ExtractableColumn>().Any(ec => ec.CatalogueExtractionInformation_ID == ei.ID)) .ToArray(); if (nonSelectedCore.Any()) { notifier.OnCheckPerformed(new CheckEventArgs($"'{ds}' Core columns not selected for extractions: { Environment.NewLine + string.Join(',', nonSelectedCore.Select(o => o.ToString() + Environment.NewLine)) }" + $"{ Environment.NewLine } Extraction Configuration: '{config}' ", CheckResult.Warning)); } //Make sure cohort and dataset are on same server before checking (can still get around this at runtime by using ExecuteCrossServerDatasetExtractionSource) if (!cohortServer.Server.Name.Equals(server.Name, StringComparison.CurrentCultureIgnoreCase) || !cohortServer.Server.DatabaseType.Equals(server.DatabaseType)) { notifier.OnCheckPerformed(new CheckEventArgs( $"Cohort is on server '{cohortServer.Server.Name}' ({cohortServer.Server.DatabaseType}) but dataset '{request.DatasetBundle?.DataSet}' is on '{server.Name}' ({server.DatabaseType})" , CheckResult.Warning)); } else { //Try to fetch TOP 1 data try { using (var con = server.BeginNewTransactedConnection()) { //incase user somehow manages to write a filter/transform that nukes data or something DbCommand cmd; try { cmd = server.GetCommand(request.QueryBuilder.SQL, con); cmd.CommandTimeout = timeout; notifier.OnCheckPerformed( new CheckEventArgs( "/*About to send Request SQL :*/" + Environment.NewLine + request.QueryBuilder.SQL, CheckResult.Success)); } catch (QueryBuildingException e) { notifier.OnCheckPerformed(new CheckEventArgs("Failed to assemble query for dataset " + ds, CheckResult.Fail, e)); return; } try { using (var r = cmd.ExecuteReader()) { if (r.Read()) { notifier.OnCheckPerformed(new CheckEventArgs("Read at least 1 row successfully from dataset " + ds, CheckResult.Success)); } else { notifier.OnCheckPerformed(new CheckEventArgs("Dataset " + ds + " is completely empty (when linked with the cohort). " + "Extraction may fail if the Source does not allow empty extractions", CheckResult.Warning)); } } } catch (Exception e) { if (server.GetQuerySyntaxHelper().IsTimeout(e)) { notifier.OnCheckPerformed(new CheckEventArgs(ErrorCodes.ExtractTimeoutChecking, e, timeout)); } else { notifier.OnCheckPerformed(new CheckEventArgs("Failed to execute the query (See below for query)", CheckResult.Fail, e)); } } con.ManagedTransaction.AbandonAndCloseConnection(); } } catch (Exception e) { notifier.OnCheckPerformed(new CheckEventArgs("Failed to execute Top 1 on dataset " + ds, CheckResult.Fail, e)); } } var fetchOptions = _checkGlobals ? FetchOptions.ExtractableGlobalsAndLocals : FetchOptions.ExtractableLocals; foreach (var supportingDocument in cata.GetAllSupportingDocuments(fetchOptions)) { new SupportingDocumentsFetcher(supportingDocument).Check(notifier); } //check catalogue locals foreach (SupportingSQLTable table in cata.GetAllSupportingSQLTablesForCatalogue(fetchOptions)) { new SupportingSQLTableChecker(table).Check(notifier); } if (_alsoCheckPipeline != null) { var engine = new ExtractionPipelineUseCase(_activator, request.Project, request, _alsoCheckPipeline, DataLoadInfo.Empty) .GetEngine(_alsoCheckPipeline, new FromCheckNotifierToDataLoadEventListener(notifier)); engine.Check(notifier); } }
/// <summary> /// This produces the SQL that would retrieve the specified dataset columns including any JOINS /// /// <para>It uses: /// QueryBuilder and then it adds some custom lines for linking to the cohort</para> /// </summary> /// <returns></returns> public QueryBuilder GetSQLCommandForFullExtractionSet(ExtractDatasetCommand request, out List <ReleaseIdentifierSubstitution> substitutions) { if (request.QueryBuilder != null) { throw new Exception("Creation of a QueryBuilder from a request can only happen once, to access the results of the creation use the cached answer in the request.QueryBuilder property"); } if (!request.ColumnsToExtract.Any()) { throw new Exception("No columns are marked for extraction in this configuration"); } if (request.ExtractableCohort == null) { throw new NullReferenceException("No Cohort selected"); } var databaseType = request.Catalogue.GetDistinctLiveDatabaseServerType(); if (databaseType == null) { throw new NotSupportedException("Catalogue " + request.Catalogue + " did not know what DatabaseType it hosted, how can we extract from it! does it have no TableInfos?"); } var syntaxHelper = new QuerySyntaxHelperFactory().Create(databaseType.Value); substitutions = new List <ReleaseIdentifierSubstitution>(); var memoryRepository = new MemoryRepository(); switch (request.ColumnsToExtract.Count(c => c.IsExtractionIdentifier)) { //no extraction identifiers case 0: throw new Exception("There are no Columns in this dataset (" + request + ") marked as IsExtractionIdentifier"); //a single extraction identifier e.g. CHI X died on date Y with conditions a,b and c case 1: substitutions.Add(new ReleaseIdentifierSubstitution(memoryRepository, request.ColumnsToExtract.FirstOrDefault(c => c.IsExtractionIdentifier), request.ExtractableCohort, false, syntaxHelper)); break; //multiple extraction identifiers e.g. Mother X had Babies A, B, C where A,B and C are all CHIs that must be subbed for ProCHIs default: foreach (IColumn columnToSubstituteForReleaseIdentifier in request.ColumnsToExtract.Where(c => c.IsExtractionIdentifier)) { substitutions.Add(new ReleaseIdentifierSubstitution(memoryRepository, columnToSubstituteForReleaseIdentifier, request.ExtractableCohort, true, syntaxHelper)); } break; } string hashingAlgorithm = _repository.DataExportPropertyManager.GetValue(DataExportProperty.HashingAlgorithmPattern); if (string.IsNullOrWhiteSpace(hashingAlgorithm)) { hashingAlgorithm = null; } //identify any tables we are supposed to force join to var forcedJoins = request.SelectedDataSets.SelectedDataSetsForcedJoins; QueryBuilder queryBuilder = new QueryBuilder("DISTINCT ", hashingAlgorithm, forcedJoins.Select(s => s.TableInfo).ToArray()); queryBuilder.TopX = request.TopX; queryBuilder.SetSalt(request.Salt.GetSalt()); //add the constant parameters foreach (ConstantParameter parameter in GetConstantParameters(syntaxHelper, request.Configuration, request.ExtractableCohort)) { queryBuilder.ParameterManager.AddGlobalParameter(parameter); } //add the global parameters foreach (var globalExtractionFilterParameter in request.Configuration.GlobalExtractionFilterParameters) { queryBuilder.ParameterManager.AddGlobalParameter(globalExtractionFilterParameter); } //remove the identification column from the query request.ColumnsToExtract.RemoveAll(c => c.IsExtractionIdentifier); //add in the ReleaseIdentifier in place of the identification column queryBuilder.AddColumnRange(substitutions.ToArray()); //add the rest of the columns to the query queryBuilder.AddColumnRange(request.ColumnsToExtract.Cast <IColumn>().ToArray()); //add the users selected filters queryBuilder.RootFilterContainer = request.Configuration.GetFilterContainerFor(request.DatasetBundle.DataSet); ExternalCohortTable externalCohortTable = _repository.GetObjectByID <ExternalCohortTable>(request.ExtractableCohort.ExternalCohortTable_ID); if (request.ExtractableCohort != null) { //the JOIN with the cohort table: string cohortJoin; if (substitutions.Count == 1) { cohortJoin = " INNER JOIN " + externalCohortTable.TableName + " ON " + substitutions.Single().JoinSQL; } else { cohortJoin = " INNER JOIN " + externalCohortTable.TableName + " ON " + string.Join(" OR ", substitutions.Select(s => s.JoinSQL)); } //add the JOIN in after any other joins queryBuilder.AddCustomLine(cohortJoin, QueryComponent.JoinInfoJoin); //add the filter cohortID because our new Cohort system uses ID number and a giant combo table with all the cohorts in it we need to say Select XX from XX join Cohort Where Cohort number = Y queryBuilder.AddCustomLine(request.ExtractableCohort.WhereSQL(), QueryComponent.WHERE); } request.QueryBuilder = queryBuilder; return(queryBuilder); }
public void Extract_ProjectSpecificCatalogue_AppendedColumn() { //make the catalogue a custom catalogue for this project CustomExtractableDataSet.Project_ID = _project.ID; CustomExtractableDataSet.SaveToDatabase(); var pipe = SetupPipeline(); pipe.Name = "Extract_ProjectSpecificCatalogue_AppendedColumn Pipe"; pipe.SaveToDatabase(); var extraColumn = CustomCatalogue.GetAllExtractionInformation(ExtractionCategory.ProjectSpecific).Single(e => e.GetRuntimeName().Equals("SuperSecretThing")); var asExtractable = new ExtractableColumn(DataExportRepository, _extractableDataSet, _configuration, extraColumn, 10, extraColumn.SelectSQL); //get rid of any lingering joins foreach (JoinInfo j in CatalogueRepository.GetAllObjects <JoinInfo>()) { j.DeleteInDatabase(); } //add the ability to join the two tables in the query var idCol = _extractableDataSet.Catalogue.GetAllExtractionInformation(ExtractionCategory.Core).Single(c => c.IsExtractionIdentifier).ColumnInfo; var otherIdCol = CustomCatalogue.GetAllExtractionInformation(ExtractionCategory.ProjectSpecific).Single(e => e.GetRuntimeName().Equals("PrivateID")).ColumnInfo; new JoinInfo(CatalogueRepository, idCol, otherIdCol, ExtractionJoinType.Left, null); //generate a new request (this will include the newly created column) _request = new ExtractDatasetCommand(_configuration, new ExtractableDatasetBundle(_extractableDataSet)); var tbl = Database.ExpectTable("TestTable"); tbl.Truncate(); using (var blk = tbl.BeginBulkInsert()) { var dt = new DataTable(); dt.Columns.Add("PrivateID"); dt.Columns.Add("Name"); dt.Columns.Add("DateOfBirth"); dt.Rows.Add(new object[] { "Priv_12345", "Bob", "2001-01-01" }); dt.Rows.Add(new object[] { "Priv_wtf11", "Frank", "2001-10-29" }); blk.Upload(dt); } ExtractionPipelineUseCase useCase; IExecuteDatasetExtractionDestination results; Execute(out useCase, out results); var mainDataTableCsv = results.DirectoryPopulated.GetFiles().Single(f => f.Name.Equals("TestTable.csv")); Assert.IsNotNull(mainDataTableCsv); Assert.AreEqual("TestTable.csv", mainDataTableCsv.Name); var lines = File.ReadAllLines(mainDataTableCsv.FullName); Assert.AreEqual("ReleaseID,Name,DateOfBirth,SuperSecretThing", lines[0]); var bobLine = lines.Single(l => l.StartsWith("Pub_54321,Bob")); var frankLine = lines.Single(l => l.StartsWith("Pub_11ftw,Frank")); Assert.AreEqual("Pub_54321,Bob,2001-01-01,monkeys can all secretly fly", bobLine); Assert.AreEqual("Pub_11ftw,Frank,2001-10-29,the wizard of OZ was a man behind a machine", frankLine); asExtractable.DeleteInDatabase(); }
/// <summary> /// Checks the <see cref="SelectedDataSet"/> and reports success/failures to the <paramref name="notifier"/> /// </summary> /// <param name="notifier"></param> public void Check(ICheckNotifier notifier) { var ds = SelectedDataSet.ExtractableDataSet; var config = SelectedDataSet.ExtractionConfiguration; var cohort = config.Cohort; var project = config.Project; const int timeout = 5; notifier.OnCheckPerformed(new CheckEventArgs("Inspecting dataset " + ds, CheckResult.Success)); var selectedcols = new List <IColumn>(config.GetAllExtractableColumnsFor(ds)); if (!selectedcols.Any()) { notifier.OnCheckPerformed( new CheckEventArgs( "Dataset " + ds + " in configuration '" + config + "' has no selected columns", CheckResult.Fail)); return; } var request = new ExtractDatasetCommand(config, cohort, new ExtractableDatasetBundle(ds), selectedcols, new HICProjectSalt(project), new ExtractionDirectory(project.ExtractionDirectory, config)) { TopX = 1 }; try { request.GenerateQueryBuilder(); } catch (Exception e) { notifier.OnCheckPerformed( new CheckEventArgs( "Could not generate valid extraction SQL for dataset " + ds + " in configuration " + config, CheckResult.Fail, e)); return; } var server = request.GetDistinctLiveDatabaseServer(); bool serverExists = server.Exists(); notifier.OnCheckPerformed(new CheckEventArgs("Server " + server + " Exists:" + serverExists, serverExists ? CheckResult.Success : CheckResult.Fail)); var cohortServer = request.ExtractableCohort.ExternalCohortTable.Discover(); if (cohortServer == null || !cohortServer.Exists()) { notifier.OnCheckPerformed(new CheckEventArgs("Cohort server did not exist or was unreachable", CheckResult.Fail)); return; } foreach (IGrouping <string, IColumn> grouping in request.ColumnsToExtract.GroupBy(c => c.GetRuntimeName()).Where(g => g.Count() > 1)) { notifier.OnCheckPerformed(new CheckEventArgs("There are " + grouping.Count() + " columns in the extract called '" + grouping.Key + "'", CheckResult.Fail)); } //Make sure cohort and dataset are on same server before checking (can still get around this at runtime by using ExecuteCrossServerDatasetExtractionSource) if (!cohortServer.Server.Name.Equals(server.Name, StringComparison.CurrentCultureIgnoreCase) || !cohortServer.Server.DatabaseType.Equals(server.DatabaseType)) { notifier.OnCheckPerformed(new CheckEventArgs( string.Format("Cohort is on server '{0}' ({1}) but dataset is on '{2}' ({3})", cohortServer.Server.Name, cohortServer.Server.DatabaseType, server.Name, server.DatabaseType), CheckResult.Warning)); } else { //Try to fetch TOP 1 data try { using (var con = server.BeginNewTransactedConnection()) { //incase user somehow manages to write a filter/transform that nukes data or something DbCommand cmd; try { cmd = server.GetCommand(request.QueryBuilder.SQL, con); cmd.CommandTimeout = timeout; notifier.OnCheckPerformed( new CheckEventArgs( "/*About to send Request SQL :*/" + Environment.NewLine + request.QueryBuilder.SQL, CheckResult.Success)); } catch (QueryBuildingException e) { notifier.OnCheckPerformed(new CheckEventArgs("Failed to assemble query for dataset " + ds, CheckResult.Fail, e)); return; } try { using (var r = cmd.ExecuteReader()) { if (r.Read()) { notifier.OnCheckPerformed(new CheckEventArgs("Read at least 1 row successfully from dataset " + ds, CheckResult.Success)); } else { notifier.OnCheckPerformed(new CheckEventArgs("Dataset " + ds + " is completely empty (when linked with the cohort). " + "Extraction may fail if the Source does not allow empty extractions", CheckResult.Warning)); } } } catch (Exception e) { if (server.GetQuerySyntaxHelper().IsTimeout(e)) { notifier.OnCheckPerformed(new CheckEventArgs("Failed to read rows after " + timeout + "s", CheckResult.Warning, e)); } else { notifier.OnCheckPerformed(new CheckEventArgs("Failed to execute the query (See below for query)", CheckResult.Fail, e)); } } con.ManagedTransaction.AbandonAndCloseConnection(); } } catch (Exception e) { notifier.OnCheckPerformed(new CheckEventArgs("Failed to execute Top 1 on dataset " + ds, CheckResult.Fail, e)); } } var cata = ds.Catalogue; var fetchOptions = _checkGlobals ? FetchOptions.ExtractableGlobalsAndLocals : FetchOptions.ExtractableLocals; foreach (var supportingDocument in cata.GetAllSupportingDocuments(fetchOptions)) { new SupportingDocumentsFetcher(supportingDocument).Check(notifier); } //check catalogue locals foreach (SupportingSQLTable table in cata.GetAllSupportingSQLTablesForCatalogue(fetchOptions)) { new SupportingSQLTableChecker(table).Check(notifier); } if (_alsoCheckPipeline != null) { var engine = new ExtractionPipelineUseCase(request.Project, request, _alsoCheckPipeline, DataLoadInfo.Empty) .GetEngine(_alsoCheckPipeline, new FromCheckNotifierToDataLoadEventListener(notifier)); engine.Check(notifier); } }
public void Extract_ProjectSpecificCatalogue_FilterReference() { //make the catalogue a custom catalogue for this project CustomExtractableDataSet.Project_ID = _project.ID; CustomExtractableDataSet.SaveToDatabase(); var pipe = SetupPipeline(); pipe.Name = "Extract_ProjectSpecificCatalogue_FilterReference Pipe"; pipe.SaveToDatabase(); var rootContainer = new FilterContainer(DataExportRepository); _selectedDataSet.RootFilterContainer_ID = rootContainer.ID; _selectedDataSet.SaveToDatabase(); var filter = new DeployedExtractionFilter(DataExportRepository, "monkeys only", rootContainer); filter.WhereSQL = "SuperSecretThing = 'monkeys can all secretly fly'"; filter.SaveToDatabase(); rootContainer.AddChild(filter); //get rid of any lingering joins foreach (JoinInfo j in CatalogueRepository.GetAllObjects <JoinInfo>()) { j.DeleteInDatabase(); } //add the ability to join the two tables in the query var idCol = _extractableDataSet.Catalogue.GetAllExtractionInformation(ExtractionCategory.Core).Single(c => c.IsExtractionIdentifier).ColumnInfo; var otherIdCol = CustomCatalogue.GetAllExtractionInformation(ExtractionCategory.ProjectSpecific).Single(e => e.GetRuntimeName().Equals("PrivateID")).ColumnInfo; new JoinInfo(CatalogueRepository, idCol, otherIdCol, ExtractionJoinType.Left, null); new SelectedDataSetsForcedJoin(DataExportRepository, _selectedDataSet, CustomTableInfo); //generate a new request (this will include the newly created column) _request = new ExtractDatasetCommand(_configuration, new ExtractableDatasetBundle(_extractableDataSet)); var tbl = Database.ExpectTable("TestTable"); tbl.Truncate(); using (var blk = tbl.BeginBulkInsert()) { var dt = new DataTable(); dt.Columns.Add("PrivateID"); dt.Columns.Add("Name"); dt.Columns.Add("DateOfBirth"); dt.Rows.Add(new object[] { "Priv_12345", "Bob", "2001-01-01" }); dt.Rows.Add(new object[] { "Priv_wtf11", "Frank", "2001-10-29" }); blk.Upload(dt); } ExtractionPipelineUseCase useCase; IExecuteDatasetExtractionDestination results; Execute(out useCase, out results); var mainDataTableCsv = results.DirectoryPopulated.GetFiles().Single(f => f.Name.Equals("TestTable.csv")); Assert.IsNotNull(mainDataTableCsv); var lines = File.ReadAllLines(mainDataTableCsv.FullName); Assert.AreEqual("ReleaseID,Name,DateOfBirth", lines[0]); Assert.AreEqual("Pub_54321,Bob,2001-01-01", lines[1]); Assert.AreEqual(2, lines.Length); }