public virtual DataTable GetChunk(IDataLoadEventListener listener, GracefulCancellationToken cancellationToken) { // we are in the Global Commands case, let's return an empty DataTable (not null) // so we can trigger the destination to extract the globals docs and sql if (GlobalsRequest != null) { GlobalsRequest.ElevateState(ExtractCommandState.WaitingForSQLServer); if (firstGlobalChunk) { //unless we are checking, start auditing StartAuditGlobals(); firstGlobalChunk = false; return(new DataTable(ExtractionDirectory.GLOBALS_DATA_NAME)); } return(null); } if (Request == null) { throw new Exception("Component has not been initialized before being asked to GetChunk(s)"); } Request.ElevateState(ExtractCommandState.WaitingForSQLServer); if (_cancel) { throw new Exception("User cancelled data extraction"); } if (_hostedSource == null) { StartAudit(Request.QueryBuilder.SQL); if (Request.DatasetBundle.DataSet.DisableExtraction) { throw new Exception("Cannot extract " + Request.DatasetBundle.DataSet + " because DisableExtraction is set to true"); } _hostedSource = new DbDataCommandDataFlowSource(GetCommandSQL(listener), "ExecuteDatasetExtraction " + Request.DatasetBundle.DataSet, Request.GetDistinctLiveDatabaseServer().Builder, ExecutionTimeout); // If we are running in batches then always allow empty extractions _hostedSource.AllowEmptyResultSets = AllowEmptyExtractions || Request.IsBatchResume; _hostedSource.BatchSize = BatchSize; } DataTable chunk = null; try { chunk = _hostedSource.GetChunk(listener, cancellationToken); chunk = _peeker.AddPeekedRowsIfAny(chunk); //if we are trying to distinct the records in memory based on release id if (DistinctStrategy == DistinctStrategy.OrderByAndDistinctInMemory) { var releaseIdentifierColumn = Request.ReleaseIdentifierSubstitutions.First().GetRuntimeName(); if (chunk != null) { //last release id in the current chunk var lastReleaseId = chunk.Rows[chunk.Rows.Count - 1][releaseIdentifierColumn]; _peeker.AddWhile(_hostedSource, r => Equals(r[releaseIdentifierColumn], lastReleaseId), chunk); chunk = MakeDistinct(chunk, listener, cancellationToken); } } } catch (AggregateException a) { if (a.GetExceptionIfExists <TaskCanceledException>() != null) { _cancel = true; } throw; } catch (Exception e) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Read from source failed", e)); } if (cancellationToken.IsCancellationRequested) { throw new Exception("Data read cancelled because our cancellationToken was set, aborting data reading"); } //if the first chunk is null if (firstChunk && chunk == null && !AllowEmptyExtractions) { throw new Exception("There is no data to load, query returned no rows, query was:" + Environment.NewLine + (_hostedSource.Sql ?? Request.QueryBuilder.SQL)); } //not the first chunk anymore firstChunk = false; //data exhausted if (chunk == null) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Data exhausted after reading " + _rowsRead + " rows of data (" + UniqueReleaseIdentifiersEncountered.Count + " unique release identifiers seen)")); if (Request != null) { Request.CumulativeExtractionResults.DistinctReleaseIdentifiersEncountered = Request.IsBatchResume ? -1 : UniqueReleaseIdentifiersEncountered.Count; } return(null); } _rowsRead += chunk.Rows.Count; //chunk will have datatypes for all the things in the buffer so we can populate our dictionary of facts about what columns/catalogue items have spontaneously changed name/type etc if (ExtractTimeTransformationsObserved == null) { GenerateExtractionTransformObservations(chunk); } //see if the SqlDataReader has a column with the same name as the ReleaseIdentifierSQL (if so then we can use it to count the number of distinct subjects written out to the csv) bool includesReleaseIdentifier = _extractionIdentifiersidx.Count > 0; //first line - lets see what columns we wrote out //looks at the buffer and computes any transforms performed on the column _timeSpentValidating.Start(); //build up the validation report (Missing/Wrong/Etc) - this has no mechanical effect on the extracted data just some metadata that goes into a flat file if (ExtractionTimeValidator != null && Request.IncludeValidation) { try { chunk.Columns.Add(ValidationColumnName); ExtractionTimeValidator.Validate(chunk, ValidationColumnName); _rowsValidated += chunk.Rows.Count; listener.OnProgress(this, new ProgressEventArgs("Validation", new ProgressMeasurement(_rowsValidated, ProgressType.Records), _timeSpentValidating.Elapsed)); } catch (Exception ex) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Could not validate data chunk", ex)); ValidationFailureException = ex; ExtractionTimeValidator = null; } } _timeSpentValidating.Stop(); _timeSpentBuckettingDates.Start(); if (ExtractionTimeTimeCoverageAggregator != null) { _rowsBucketted += chunk.Rows.Count; foreach (DataRow row in chunk.Rows) { ExtractionTimeTimeCoverageAggregator.ProcessRow(row); } listener.OnProgress(this, new ProgressEventArgs("Bucketting Dates", new ProgressMeasurement(_rowsBucketted, ProgressType.Records), _timeSpentCalculatingDISTINCT.Elapsed)); } _timeSpentBuckettingDates.Stop(); _timeSpentCalculatingDISTINCT.Start(); //record unique release identifiers found if (includesReleaseIdentifier) { foreach (string idx in _extractionIdentifiersidx) { foreach (DataRow r in chunk.Rows) { if (r[idx] == DBNull.Value) { if (_extractionIdentifiersidx.Count == 1) { throw new Exception("Null release identifier found in extract of dataset " + Request.DatasetBundle.DataSet); } else { continue; //there are multiple extraction identifiers thats fine if one or two are null } } if (!UniqueReleaseIdentifiersEncountered.Contains(r[idx])) { UniqueReleaseIdentifiersEncountered.Add(r[idx]); } } listener.OnProgress(this, new ProgressEventArgs("Calculating Distinct Release Identifiers", new ProgressMeasurement(UniqueReleaseIdentifiersEncountered.Count, ProgressType.Records), _timeSpentCalculatingDISTINCT.Elapsed)); } } _timeSpentCalculatingDISTINCT.Stop(); return(chunk); }
public void RetrieveChunks() { var source = new DbDataCommandDataFlowSource("Select top 3 * from master.sys.tables", "Query Sys tables", DiscoveredServerICanCreateRandomDatabasesAndTablesOn.Builder, 30); Assert.AreEqual(3, source.GetChunk(new ThrowImmediatelyDataLoadEventListener(), new GracefulCancellationToken()).Rows.Count); }