コード例 #1
0
ファイル: Warehouse.cs プロジェクト: girish/NuGet.Jobs
        public async Task <DataTable> CreateAsync(IReadOnlyCollection <PackageStatistics> sourceData, string logFileName)
        {
            var stopwatch = Stopwatch.StartNew();

            // insert any new dimension data first
            if (_times == null)
            {
                // this call is only needed once in the lifetime of the service
                _times = await GetDimension("time", logFileName, connection => RetrieveTimeDimensions(connection));
            }

            var packagesTask            = GetDimension("package", logFileName, connection => RetrievePackageDimensions(sourceData, connection));
            var operationsTask          = GetDimension("operation", logFileName, connection => RetrieveOperationDimensions(sourceData, connection));
            var projectTypesTask        = GetDimension("project type", logFileName, connection => RetrieveProjectTypeDimensions(sourceData, connection));
            var clientsTask             = GetDimension("client", logFileName, connection => RetrieveClientDimensions(sourceData, connection));
            var platformsTask           = GetDimension("platform", logFileName, connection => RetrievePlatformDimensions(sourceData, connection));
            var datesTask               = GetDimension("date", logFileName, connection => RetrieveDateDimensions(connection, sourceData.Min(e => e.EdgeServerTimeDelivered), sourceData.Max(e => e.EdgeServerTimeDelivered)));
            var packageTranslationsTask = GetDimension("package translations", logFileName, connection => RetrievePackageTranslations(sourceData, connection));

            await Task.WhenAll(operationsTask, projectTypesTask, clientsTask, platformsTask, datesTask, packagesTask, packageTranslationsTask);

            var operations   = operationsTask.Result;
            var projectTypes = projectTypesTask.Result;
            var clients      = clientsTask.Result;
            var platforms    = platformsTask.Result;

            var dates               = datesTask.Result;
            var packages            = packagesTask.Result;
            var packageTranslations = packageTranslationsTask.Result;

            // create facts data rows by linking source data with dimensions
            var dataImporter = new DataImporter(_targetDatabase);
            var dataTable    = await dataImporter.GetDataTableAsync("Fact_Download");

            // ensure all dimension IDs are set to the Unknown equivalent if no dimension data is available
            int?operationId   = !operations.Any() ? DimensionId.Unknown : (int?)null;
            int?projectTypeId = !projectTypes.Any() ? DimensionId.Unknown : (int?)null;
            int?clientId      = !clients.Any() ? DimensionId.Unknown : (int?)null;
            int?platformId    = !platforms.Any() ? DimensionId.Unknown : (int?)null;

            Trace.WriteLine("Creating facts...");
            foreach (var groupedByPackageId in sourceData.GroupBy(e => e.PackageId, StringComparer.OrdinalIgnoreCase))
            {
                var packagesForId = packages.Where(e => string.Equals(e.PackageId, groupedByPackageId.Key, StringComparison.OrdinalIgnoreCase)).ToList();

                foreach (var groupedByPackageIdAndVersion in groupedByPackageId.GroupBy(e => e.PackageVersion, StringComparer.OrdinalIgnoreCase))
                {
                    int packageId;
                    var package = packagesForId.FirstOrDefault(e => string.Equals(e.PackageVersion, groupedByPackageIdAndVersion.Key, StringComparison.OrdinalIgnoreCase));
                    if (package == null)
                    {
                        // This package id and version could not be 100% accurately parsed from the CDN Request URL,
                        // likely due to weird package ID which could be interpreted as a version string.
                        // Look for a mapping in the support table in an attempt to auto-correct this entry.
                        var packageTranslation = packageTranslations.FirstOrDefault(t => t.IncorrectPackageId == groupedByPackageId.Key && t.IncorrectPackageVersion == groupedByPackageIdAndVersion.Key);
                        if (packageTranslation != null)
                        {
                            // there seems to be a mapping
                            packageId = packageTranslation.CorrectedPackageId;
                        }
                        else
                        {
                            // Track it in Application Insights.
                            ApplicationInsights.TrackPackageNotFound(groupedByPackageId.Key, groupedByPackageIdAndVersion.Key, logFileName);

                            continue;
                        }
                    }
                    else
                    {
                        packageId = package.Id;
                    }

                    foreach (var element in groupedByPackageIdAndVersion)
                    {
                        // required dimensions
                        var dateId = dates.First(e => e.Date.Equals(element.EdgeServerTimeDelivered.Date)).Id;
                        var timeId = _times.First(e => e.HourOfDay == element.EdgeServerTimeDelivered.Hour).Id;

                        // dimensions that could be "(unknown)"
                        if (!operationId.HasValue)
                        {
                            if (!operations.ContainsKey(element.Operation))
                            {
                                operationId = DimensionId.Unknown;
                            }
                            else
                            {
                                operationId = operations[element.Operation];
                            }
                        }
                        if (!platformId.HasValue)
                        {
                            if (!platforms.ContainsKey(element.UserAgent))
                            {
                                platformId = DimensionId.Unknown;
                            }
                            else
                            {
                                platformId = platforms[element.UserAgent];
                            }
                        }
                        if (!clientId.HasValue)
                        {
                            if (!clients.ContainsKey(element.UserAgent))
                            {
                                clientId = DimensionId.Unknown;
                            }
                            else
                            {
                                clientId = clients[element.UserAgent];
                            }
                        }

                        if (!projectTypeId.HasValue)
                        {
                            // foreach project type
                            foreach (var projectGuid in element.ProjectGuids.Split(new[] { ";" }, StringSplitOptions.RemoveEmptyEntries))
                            {
                                projectTypeId = projectTypes[projectGuid];

                                var dataRow = dataTable.NewRow();
                                FillDataRow(dataRow, dateId, timeId, packageId, operationId.Value, platformId.Value, projectTypeId.Value, clientId.Value, logFileName, element.UserAgent);
                                dataTable.Rows.Add(dataRow);
                            }
                        }
                        else
                        {
                            var dataRow = dataTable.NewRow();
                            FillDataRow(dataRow, dateId, timeId, packageId, operationId.Value, platformId.Value, projectTypeId.Value, clientId.Value, logFileName, element.UserAgent);
                            dataTable.Rows.Add(dataRow);
                        }
                    }
                }
            }
            stopwatch.Stop();
            Trace.Write("  DONE (" + dataTable.Rows.Count + " records, " + stopwatch.ElapsedMilliseconds + "ms)");
            ApplicationInsights.TrackMetric("Blob record count", dataTable.Rows.Count, logFileName);

            return(dataTable);
        }