public async Task <DataTable> CreateAsync(IReadOnlyCollection <PackageStatistics> sourceData, string logFileName) { var stopwatch = Stopwatch.StartNew(); // insert any new dimension data first if (_times == null) { // this call is only needed once in the lifetime of the service _times = await GetDimension("time", logFileName, connection => RetrieveTimeDimensions(connection)); } var packagesTask = GetDimension("package", logFileName, connection => RetrievePackageDimensions(sourceData, connection)); var operationsTask = GetDimension("operation", logFileName, connection => RetrieveOperationDimensions(sourceData, connection)); var projectTypesTask = GetDimension("project type", logFileName, connection => RetrieveProjectTypeDimensions(sourceData, connection)); var clientsTask = GetDimension("client", logFileName, connection => RetrieveClientDimensions(sourceData, connection)); var platformsTask = GetDimension("platform", logFileName, connection => RetrievePlatformDimensions(sourceData, connection)); var datesTask = GetDimension("date", logFileName, connection => RetrieveDateDimensions(connection, sourceData.Min(e => e.EdgeServerTimeDelivered), sourceData.Max(e => e.EdgeServerTimeDelivered))); var packageTranslationsTask = GetDimension("package translations", logFileName, connection => RetrievePackageTranslations(sourceData, connection)); await Task.WhenAll(operationsTask, projectTypesTask, clientsTask, platformsTask, datesTask, packagesTask, packageTranslationsTask); var operations = operationsTask.Result; var projectTypes = projectTypesTask.Result; var clients = clientsTask.Result; var platforms = platformsTask.Result; var dates = datesTask.Result; var packages = packagesTask.Result; var packageTranslations = packageTranslationsTask.Result; // create facts data rows by linking source data with dimensions var dataImporter = new DataImporter(_targetDatabase); var dataTable = await dataImporter.GetDataTableAsync("Fact_Download"); // ensure all dimension IDs are set to the Unknown equivalent if no dimension data is available int?operationId = !operations.Any() ? DimensionId.Unknown : (int?)null; int?projectTypeId = !projectTypes.Any() ? DimensionId.Unknown : (int?)null; int?clientId = !clients.Any() ? DimensionId.Unknown : (int?)null; int?platformId = !platforms.Any() ? DimensionId.Unknown : (int?)null; Trace.WriteLine("Creating facts..."); foreach (var groupedByPackageId in sourceData.GroupBy(e => e.PackageId, StringComparer.OrdinalIgnoreCase)) { var packagesForId = packages.Where(e => string.Equals(e.PackageId, groupedByPackageId.Key, StringComparison.OrdinalIgnoreCase)).ToList(); foreach (var groupedByPackageIdAndVersion in groupedByPackageId.GroupBy(e => e.PackageVersion, StringComparer.OrdinalIgnoreCase)) { int packageId; var package = packagesForId.FirstOrDefault(e => string.Equals(e.PackageVersion, groupedByPackageIdAndVersion.Key, StringComparison.OrdinalIgnoreCase)); if (package == null) { // This package id and version could not be 100% accurately parsed from the CDN Request URL, // likely due to weird package ID which could be interpreted as a version string. // Look for a mapping in the support table in an attempt to auto-correct this entry. var packageTranslation = packageTranslations.FirstOrDefault(t => t.IncorrectPackageId == groupedByPackageId.Key && t.IncorrectPackageVersion == groupedByPackageIdAndVersion.Key); if (packageTranslation != null) { // there seems to be a mapping packageId = packageTranslation.CorrectedPackageId; } else { // Track it in Application Insights. ApplicationInsights.TrackPackageNotFound(groupedByPackageId.Key, groupedByPackageIdAndVersion.Key, logFileName); continue; } } else { packageId = package.Id; } foreach (var element in groupedByPackageIdAndVersion) { // required dimensions var dateId = dates.First(e => e.Date.Equals(element.EdgeServerTimeDelivered.Date)).Id; var timeId = _times.First(e => e.HourOfDay == element.EdgeServerTimeDelivered.Hour).Id; // dimensions that could be "(unknown)" if (!operationId.HasValue) { if (!operations.ContainsKey(element.Operation)) { operationId = DimensionId.Unknown; } else { operationId = operations[element.Operation]; } } if (!platformId.HasValue) { if (!platforms.ContainsKey(element.UserAgent)) { platformId = DimensionId.Unknown; } else { platformId = platforms[element.UserAgent]; } } if (!clientId.HasValue) { if (!clients.ContainsKey(element.UserAgent)) { clientId = DimensionId.Unknown; } else { clientId = clients[element.UserAgent]; } } if (!projectTypeId.HasValue) { // foreach project type foreach (var projectGuid in element.ProjectGuids.Split(new[] { ";" }, StringSplitOptions.RemoveEmptyEntries)) { projectTypeId = projectTypes[projectGuid]; var dataRow = dataTable.NewRow(); FillDataRow(dataRow, dateId, timeId, packageId, operationId.Value, platformId.Value, projectTypeId.Value, clientId.Value, logFileName, element.UserAgent); dataTable.Rows.Add(dataRow); } } else { var dataRow = dataTable.NewRow(); FillDataRow(dataRow, dateId, timeId, packageId, operationId.Value, platformId.Value, projectTypeId.Value, clientId.Value, logFileName, element.UserAgent); dataTable.Rows.Add(dataRow); } } } } stopwatch.Stop(); Trace.Write(" DONE (" + dataTable.Rows.Count + " records, " + stopwatch.ElapsedMilliseconds + "ms)"); ApplicationInsights.TrackMetric("Blob record count", dataTable.Rows.Count, logFileName); return(dataTable); }