private DatasetDataTableMergeResult Merge(TableLoadResult latestTableLoadResult,
                                                  TableLoadResult tableLoadResultToMerge)
        {
            IEnumerable <RowLoadResult> newRows     = tableLoadResultToMerge.GetRowsMissingFrom(latestTableLoadResult).ToArray();
            IEnumerable <RowLoadResult> updatedRows = tableLoadResultToMerge.GetRowsWhereFieldsDifferFromMatchIn(latestTableLoadResult).ToArray();

            latestTableLoadResult.UpdateMatchingRowsWithFieldsValuesFrom(updatedRows);
            latestTableLoadResult.AddRows(newRows);

            return(new DatasetDataTableMergeResult
            {
                TableDefinitionName = latestTableLoadResult.TableDefinition.Name,
                NewRowsCount = newRows.Count(),
                UpdatedRowsCount = updatedRows.Count()
            });
        }
Esempio n. 2
0
        private async Task <BuildProject> ProcessDataset(Dataset dataset, string specificationId, string relationshipId, int version, Reference user)
        {
            string dataDefinitionId = dataset.Definition.Id;

            DatasetVersion datasetVersion = dataset.History.Where(v => v.Version == version).SingleOrDefault();

            if (datasetVersion == null)
            {
                _logger.Error("Dataset version not found for dataset '{name}' ({id}) version '{version}'", dataset.Id, dataset.Name, version);
                throw new NonRetriableException($"Dataset version not found for dataset '{dataset.Name}' ({dataset.Name}) version '{version}'");
            }

            string fullBlobName = datasetVersion.BlobName;

            DatasetDefinition datasetDefinition =
                (await _datasetRepository.GetDatasetDefinitionsByQuery(m => m.Id == dataDefinitionId))?.FirstOrDefault();

            if (datasetDefinition == null)
            {
                _logger.Error($"Unable to find a data definition for id: {dataDefinitionId}, for blob: {fullBlobName}");

                throw new NonRetriableException($"Unable to find a data definition for id: {dataDefinitionId}, for blob: {fullBlobName}");
            }

            BuildProject buildProject = await _calcsRepository.GetBuildProjectBySpecificationId(specificationId);

            if (buildProject == null)
            {
                _logger.Error($"Unable to find a build project for specification id: {specificationId}");

                throw new NonRetriableException($"Unable to find a build project for id: {specificationId}");
            }

            TableLoadResult loadResult = await GetTableResult(fullBlobName, datasetDefinition);

            if (loadResult == null)
            {
                _logger.Error($"Failed to load table result");

                throw new NonRetriableException($"Failed to load table result");
            }

            await PersistDataset(loadResult, dataset, datasetDefinition, buildProject, specificationId, relationshipId, version, user);

            return(buildProject);
        }
Esempio n. 3
0
        private async Task PersistDataset(TableLoadResult loadResult, Dataset dataset, DatasetDefinition datasetDefinition, BuildProject buildProject, string specificationId, string relationshipId, int version, Reference user)
        {
            IEnumerable <ProviderSummary> providerSummaries = await _providerService.FetchCoreProviderData();

            Guard.IsNullOrWhiteSpace(relationshipId, nameof(relationshipId));

            IList <ProviderSourceDataset> providerSourceDatasets = new List <ProviderSourceDataset>();

            if (buildProject.DatasetRelationships == null)
            {
                _logger.Error($"No dataset relationships found for build project with id : '{buildProject.Id}' for specification '{specificationId}'");
                return;
            }

            DatasetRelationshipSummary relationshipSummary = buildProject.DatasetRelationships.FirstOrDefault(m => m.Relationship.Id == relationshipId);

            if (relationshipSummary == null)
            {
                _logger.Error($"No dataset relationship found for build project with id : {buildProject.Id} with data definition id {datasetDefinition.Id} and relationshipId '{relationshipId}'");
                return;
            }

            ConcurrentDictionary <string, ProviderSourceDataset> existingCurrent = new ConcurrentDictionary <string, ProviderSourceDataset>();

            IEnumerable <ProviderSourceDataset> existingCurrentDatasets = await _providerResultsRepositoryPolicy.ExecuteAsync(() =>
                                                                                                                              _providersResultsRepository.GetCurrentProviderSourceDatasets(specificationId, relationshipId));

            if (existingCurrentDatasets.AnyWithNullCheck())
            {
                foreach (ProviderSourceDataset currentDataset in existingCurrentDatasets)
                {
                    existingCurrent.TryAdd(currentDataset.ProviderId, currentDataset);
                }
            }

            ConcurrentDictionary <string, ProviderSourceDataset> resultsByProviderId = new ConcurrentDictionary <string, ProviderSourceDataset>();

            ConcurrentDictionary <string, ProviderSourceDataset> updateCurrentDatasets = new ConcurrentDictionary <string, ProviderSourceDataset>();

            Parallel.ForEach(loadResult.Rows, (RowLoadResult row) =>
            {
                IEnumerable <string> allProviderIds = GetProviderIdsForIdentifier(datasetDefinition, row, providerSummaries);

                foreach (string providerId in allProviderIds)
                {
                    if (!resultsByProviderId.TryGetValue(providerId, out ProviderSourceDataset sourceDataset))
                    {
                        sourceDataset = new ProviderSourceDataset
                        {
                            DataGranularity            = relationshipSummary.DataGranularity,
                            SpecificationId            = specificationId,
                            DefinesScope               = relationshipSummary.DefinesScope,
                            DataRelationship           = new Reference(relationshipSummary.Relationship.Id, relationshipSummary.Relationship.Name),
                            DatasetRelationshipSummary = new Reference(relationshipSummary.Id, relationshipSummary.Name),
                            ProviderId = providerId
                        };

                        sourceDataset.Current = new ProviderSourceDatasetVersion
                        {
                            Rows                    = new List <Dictionary <string, object> >(),
                            Dataset                 = new VersionReference(dataset.Id, dataset.Name, version),
                            Date                    = DateTimeOffset.Now.ToLocalTime(),
                            ProviderId              = providerId,
                            Version                 = 1,
                            PublishStatus           = Models.Versioning.PublishStatus.Draft,
                            ProviderSourceDatasetId = sourceDataset.Id,
                            Author                  = user
                        };

                        if (!resultsByProviderId.TryAdd(providerId, sourceDataset))
                        {
                            resultsByProviderId.TryGetValue(providerId, out sourceDataset);
                        }
                    }

                    if (_featureToggle.IsUseFieldDefinitionIdsInSourceDatasetsEnabled())
                    {
                        sourceDataset.DataDefinitionId = relationshipSummary.DatasetDefinition.Id;

                        Dictionary <string, object> rows = new Dictionary <string, object>();

                        foreach (KeyValuePair <string, object> rowField in row.Fields)
                        {
                            foreach (TableDefinition tableDefinition in datasetDefinition.TableDefinitions)
                            {
                                FieldDefinition fieldDefinition = tableDefinition.FieldDefinitions.FirstOrDefault(m => m.Name == rowField.Key);
                                if (fieldDefinition != null)
                                {
                                    rows.Add(fieldDefinition.Id, rowField.Value);
                                }
                            }
                        }

                        sourceDataset.Current.Rows.Add(rows);
                    }
                    else
                    {
                        sourceDataset.DataDefinition = new Reference(relationshipSummary.DatasetDefinition.Id, relationshipSummary.DatasetDefinition.Name);

                        sourceDataset.Current.Rows.Add(row.Fields);
                    }
                }
            });

            ConcurrentBag <ProviderSourceDatasetVersion> historyToSave = new ConcurrentBag <ProviderSourceDatasetVersion>();

            List <Task> historySaveTasks = new List <Task>(resultsByProviderId.Count);

            SemaphoreSlim throttler = new SemaphoreSlim(initialCount: 15);

            foreach (KeyValuePair <string, ProviderSourceDataset> providerSourceDataset in resultsByProviderId)
            {
                await throttler.WaitAsync();

                historySaveTasks.Add(
                    Task.Run(async() =>
                {
                    try
                    {
                        string providerId = providerSourceDataset.Key;
                        ProviderSourceDataset sourceDataset = providerSourceDataset.Value;

                        ProviderSourceDatasetVersion newVersion = null;

                        if (existingCurrent.ContainsKey(providerId))
                        {
                            newVersion = existingCurrent[providerId].Current.Clone() as ProviderSourceDatasetVersion;

                            string existingDatasetJson = JsonConvert.SerializeObject(existingCurrent[providerId].Current.Rows);
                            string latestDatasetJson   = JsonConvert.SerializeObject(sourceDataset.Current.Rows);

                            if (existingDatasetJson != latestDatasetJson)
                            {
                                newVersion        = await _sourceDatasetsVersionRepository.CreateVersion(newVersion, existingCurrent[providerId].Current, providerId);
                                newVersion.Author = user;
                                newVersion.Rows   = sourceDataset.Current.Rows;

                                sourceDataset.Current = newVersion;

                                updateCurrentDatasets.TryAdd(providerId, sourceDataset);

                                historyToSave.Add(newVersion);
                            }

                            existingCurrent.TryRemove(providerId, out ProviderSourceDataset existingProviderSourceDataset);
                        }
                        else
                        {
                            newVersion = sourceDataset.Current;

                            updateCurrentDatasets.TryAdd(providerId, sourceDataset);

                            historyToSave.Add(newVersion);
                        }
                    }
                    finally
                    {
                        throttler.Release();
                    }
                }));
            }

            await TaskHelper.WhenAllAndThrow(historySaveTasks.ToArray());

            if (updateCurrentDatasets.Count > 0)
            {
                _logger.Information($"Saving {updateCurrentDatasets.Count()} updated source datasets");

                await _providerResultsRepositoryPolicy.ExecuteAsync(() =>
                                                                    _providersResultsRepository.UpdateCurrentProviderSourceDatasets(updateCurrentDatasets.Values));
            }

            if (_featureToggle.IsProviderResultsSpecificationCleanupEnabled() && existingCurrent.Any())
            {
                _logger.Information($"Removing {existingCurrent.Count()} missing source datasets");

                await _providerResultsRepositoryPolicy.ExecuteAsync(() =>
                                                                    _providersResultsRepository.DeleteCurrentProviderSourceDatasets(existingCurrent.Values));

                foreach (IEnumerable <ProviderSourceDataset> providerSourceDataSets in existingCurrent.Values.Partition <ProviderSourceDataset>(1000))
                {
                    await SendProviderSourceDatasetCleanupMessageToTopic(specificationId, ServiceBusConstants.TopicNames.ProviderSourceDatasetCleanup, providerSourceDataSets);
                }
            }

            if (historyToSave.Any())
            {
                _logger.Information($"Saving {historyToSave.Count()} items to history");
                await _sourceDatasetsVersionRepository.SaveVersions(historyToSave);
            }

            Reference relationshipReference = new Reference(relationshipSummary.Relationship.Id, relationshipSummary.Relationship.Name);

            DatasetAggregations datasetAggregations = GenerateAggregations(datasetDefinition, loadResult, specificationId, relationshipReference);

            if (!datasetAggregations.Fields.IsNullOrEmpty())
            {
                await _datasetsAggregationsRepository.CreateDatasetAggregations(datasetAggregations);
            }

            await _cacheProvider.RemoveAsync <List <CalculationAggregation> >($"{CacheKeys.DatasetAggregationsForSpecification}{specificationId}");


            await PopulateProviderSummariesForSpecification(specificationId, providerSummaries);
        }
Esempio n. 4
0
        private DatasetAggregations GenerateAggregations(DatasetDefinition datasetDefinition, TableLoadResult tableLoadResult, string specificationId, Reference datasetRelationship)
        {
            DatasetAggregations datasetAggregations = new DatasetAggregations
            {
                SpecificationId       = specificationId,
                DatasetRelationshipId = datasetRelationship.Id,
                Fields = new List <AggregatedField>()
            };

            string identifierPrefix = $"Datasets.{DatasetTypeGenerator.GenerateIdentifier(datasetRelationship.Name)}";

            IEnumerable <FieldDefinition> fieldDefinitions = datasetDefinition.TableDefinitions.SelectMany(m => m.FieldDefinitions);

            RowLoadResult rowLoadResult = tableLoadResult.Rows.FirstOrDefault();

            if (rowLoadResult != null)
            {
                foreach (KeyValuePair <string, object> field in rowLoadResult.Fields)
                {
                    FieldDefinition fieldDefinition = fieldDefinitions.FirstOrDefault(m => m.Name == field.Key);

                    string fieldName = fieldDefinition.Name;

                    if (fieldDefinition.IsAggregable && fieldDefinition.IsNumeric)
                    {
                        string identifierName = $"{identifierPrefix}.{DatasetTypeGenerator.GenerateIdentifier(fieldName)}";

                        decimal sum     = tableLoadResult.Rows.SelectMany(m => m.Fields.Where(f => f.Key == fieldName)).Sum(s => s.Value != null ? Convert.ToDecimal(s.Value) : 0);
                        decimal average = tableLoadResult.Rows.SelectMany(m => m.Fields.Where(f => f.Key == fieldName)).Average(s => s.Value != null ? Convert.ToDecimal(s.Value) : 0);
                        decimal min     = tableLoadResult.Rows.SelectMany(m => m.Fields.Where(f => f.Key == fieldName)).Min(s => s.Value != null ? Convert.ToDecimal(s.Value) : 0);
                        decimal max     = tableLoadResult.Rows.SelectMany(m => m.Fields.Where(f => f.Key == fieldName)).Max(s => s.Value != null ? Convert.ToDecimal(s.Value) : 0);

                        IList <AggregatedField> aggregatedFields = new List <AggregatedField>
                        {
                            new AggregatedField
                            {
                                FieldDefinitionName = identifierName,
                                FieldType           = AggregatedType.Sum,
                                Value = sum
                            },

                            new AggregatedField
                            {
                                FieldDefinitionName = identifierName,
                                FieldType           = AggregatedType.Average,
                                Value = average
                            },

                            new AggregatedField
                            {
                                FieldDefinitionName = identifierName,
                                FieldType           = AggregatedType.Min,
                                Value = min
                            },

                            new AggregatedField
                            {
                                FieldDefinitionName = identifierName,
                                FieldType           = AggregatedType.Max,
                                Value = max
                            }
                        };

                        datasetAggregations.Fields = datasetAggregations.Fields.Concat(aggregatedFields);
                    }
                }
            }

            return(datasetAggregations);
        }
        public async Task <DatasetDataMergeResult> Merge(DatasetDefinition datasetDefinition,
                                                         string latestBlobFileName,
                                                         string blobFileNameToMerge)
        {
            DatasetDataMergeResult result = new DatasetDataMergeResult();

            bool   success;
            string errorMessage;
            List <TableLoadResult> latestTableLoadResults;
            List <TableLoadResult> tableLoadResultsToMerge;

            (success, errorMessage, latestTableLoadResults) = await ReadExcelDatasetData(datasetDefinition, latestBlobFileName);

            if (!success)
            {
                result.ErrorMessage = errorMessage;
                _logger.Error(errorMessage);
                return(result);
            }

            (success, errorMessage, tableLoadResultsToMerge) = await ReadExcelDatasetData(datasetDefinition, blobFileNameToMerge);

            if (!success)
            {
                result.ErrorMessage = errorMessage;
                _logger.Error(errorMessage);
                return(result);
            }

            foreach (TableLoadResult latestTableLoadResult in latestTableLoadResults)
            {
                TableLoadResult tableLoadResultToMerge = tableLoadResultsToMerge.FirstOrDefault(x => x.TableDefinition?.Name == latestTableLoadResult.TableDefinition.Name);

                if (tableLoadResultToMerge == null || !tableLoadResultToMerge.Rows.Any())
                {
                    result.TablesMergeResults.Add(new DatasetDataTableMergeResult
                    {
                        TableDefinitionName = latestTableLoadResult.TableDefinition.Name
                    });
                }
                else
                {
                    // Merge updates latestTableLoadResult with tableLoadResultToMerge data
                    result.TablesMergeResults.Add(Merge(latestTableLoadResult, tableLoadResultToMerge));
                }
            }

            if (result.HasChanges)
            {
                // NOTE: If any new / updated rows after merge (rows merged into latest (previous version) dataset), then the merge file will be replaced with latest merge data.
                byte[] excelAsBytes = _excelDatasetWriter.Write(datasetDefinition, latestTableLoadResults);

                ICloudBlob blob = await _blobClient.GetBlobReferenceFromServerAsync(blobFileNameToMerge);

                try
                {
                    await using MemoryStream memoryStream = new MemoryStream(excelAsBytes);
                    await _blobClientPolicy.ExecuteAsync(() => blob.UploadFromStreamAsync(memoryStream));
                }
                catch (Exception ex)
                {
                    result.ErrorMessage = $"Failed to upload {datasetDefinition.Name} to blob storage after merge.";
                    _logger.Error(ex, result.ErrorMessage);
                }
            }

            return(result);
        }