private async Task ArchiveBlobAsync(ILeasedLogFile logFile) { var stopwatch = Stopwatch.StartNew(); try { await EnsureCopiedToContainerAsync(logFile, _targetContainer); _jobEventSource.FinishingArchiveUpload(logFile.Uri); stopwatch.Stop(); ApplicationInsights.TrackMetric("Blob archiving duration (ms)", stopwatch.ElapsedMilliseconds, logFile.Blob.Name); } catch (Exception exception) { if (stopwatch.IsRunning) { stopwatch.Stop(); } _jobEventSource.FailedArchiveUpload(logFile.Uri); ApplicationInsights.TrackException(exception, logFile.Blob.Name); throw; } }
private async Task <IReadOnlyCollection <PackageStatistics> > ParseLogEntries(ILeasedLogFile logFile) { var logStream = await OpenCompressedBlobAsync(logFile); var blobUri = logFile.Uri; var blobName = logFile.Blob.Name; var packageStatistics = new List <PackageStatistics>(); var stopwatch = Stopwatch.StartNew(); try { // parse the log into table entities _jobEventSource.BeginningParseLog(blobUri); using (var logStreamReader = new StreamReader(logStream)) { do { var rawLogLine = logStreamReader.ReadLine(); if (rawLogLine != null) { var logEntry = CdnLogEntryParser.ParseLogEntryFromLine(rawLogLine); if (logEntry != null) { var statistic = PackageStatisticsParser.FromCdnLogEntry(logEntry); if (statistic != null) { packageStatistics.Add(statistic); } } } } while (!logStreamReader.EndOfStream); } _jobEventSource.FinishingParseLog(blobUri, packageStatistics.Count); stopwatch.Stop(); ApplicationInsights.TrackMetric("Blob parsing duration (ms)", stopwatch.ElapsedMilliseconds, blobName); } catch (Exception exception) { if (stopwatch.IsRunning) { stopwatch.Stop(); } _jobEventSource.FailedParseLog(blobUri); ApplicationInsights.TrackException(exception, blobName); throw; } finally { logStream.Dispose(); } return(packageStatistics); }
public override async Task <bool> Run() { try { var stopwatch = Stopwatch.StartNew(); // build downloads.v1.json var targets = new List <StorageContainerTarget>(); targets.Add(new StorageContainerTarget(_cloudStorageAccount, _statisticsContainerName)); foreach (var dataContainerName in _dataContainerNames) { targets.Add(new StorageContainerTarget(_dataStorageAccount, dataContainerName)); } var downloadCountReport = new DownloadCountReport(targets, _statisticsDatabase, _galleryDatabase); await downloadCountReport.Run(); stopwatch.Stop(); ApplicationInsights.TrackMetric(DownloadCountReport.ReportName + " Generation Time (ms)", stopwatch.ElapsedMilliseconds); ApplicationInsights.TrackReportProcessed(DownloadCountReport.ReportName); stopwatch.Restart(); // build stats-totals.json var galleryTotalsReport = new GalleryTotalsReport(_cloudStorageAccount, _statisticsContainerName, _statisticsDatabase, _galleryDatabase); await galleryTotalsReport.Run(); stopwatch.Stop(); ApplicationInsights.TrackMetric(GalleryTotalsReport.ReportName + " Generation Time (ms)", stopwatch.ElapsedMilliseconds); ApplicationInsights.TrackReportProcessed(GalleryTotalsReport.ReportName); // build tools.v1.json var toolsReport = new DownloadsPerToolVersionReport(_cloudStorageAccount, _statisticsContainerName, _statisticsDatabase, _galleryDatabase); await toolsReport.Run(); stopwatch.Stop(); ApplicationInsights.TrackMetric(DownloadsPerToolVersionReport.ReportName + " Generation Time (ms)", stopwatch.ElapsedMilliseconds); ApplicationInsights.TrackReportProcessed(DownloadsPerToolVersionReport.ReportName); stopwatch.Restart(); return(true); } catch (Exception exception) { Trace.TraceError(exception.ToString()); ApplicationInsights.TrackException(exception); return(false); } }
private async Task <Stream> OpenCompressedBlobAsync(ILeasedLogFile logFile) { var stopwatch = Stopwatch.StartNew(); try { _jobEventSource.BeginningOpenCompressedBlob(logFile.Uri); var memoryStream = new MemoryStream(); // decompress into memory (these are rolling log files and relatively small) using (var blobStream = await logFile.Blob.OpenReadAsync(AccessCondition.GenerateLeaseCondition(logFile.LeaseId), null, null)) { await blobStream.CopyToAsync(memoryStream); memoryStream.Position = 0; } stopwatch.Stop(); _jobEventSource.FinishedOpenCompressedBlob(logFile.Uri); ApplicationInsights.TrackMetric("Open compressed blob duration (ms)", stopwatch.ElapsedMilliseconds, logFile.Blob.Name); // verify if the stream is gzipped or not if (await IsGzipCompressed(memoryStream)) { return(new GZipInputStream(memoryStream)); } else { return(memoryStream); } } catch (Exception exception) { if (stopwatch.IsRunning) { stopwatch.Stop(); } _jobEventSource.FailedOpenCompressedBlob(logFile.Uri); ApplicationInsights.TrackException(exception, logFile.Blob.Name); throw; } }
internal async Task InsertDownloadFactsAsync(DataTable downloadFacts, string logFileName) { Trace.WriteLine("Inserting into facts table..."); var stopwatch = Stopwatch.StartNew(); using (var connection = await _targetDatabase.ConnectTo()) using (var transaction = connection.BeginTransaction(IsolationLevel.Serializable)) { var bulkCopy = new SqlBulkCopy(connection, SqlBulkCopyOptions.Default, transaction); bulkCopy.DestinationTableName = downloadFacts.TableName; bulkCopy.BulkCopyTimeout = _defaultCommandTimeout; try { await bulkCopy.WriteToServerAsync(downloadFacts); transaction.Commit(); stopwatch.Stop(); ApplicationInsights.TrackMetric("Insert facts duration (ms)", stopwatch.ElapsedMilliseconds, logFileName); } catch (Exception exception) { if (stopwatch.IsRunning) { stopwatch.Stop(); } ApplicationInsights.TrackException(exception, logFileName); transaction.Rollback(); throw; } } Trace.Write(" DONE"); }
public async Task <DataTable> CreateAsync(IReadOnlyCollection <PackageStatistics> sourceData, string logFileName) { var stopwatch = Stopwatch.StartNew(); // insert any new dimension data first if (_times == null) { // this call is only needed once in the lifetime of the service _times = await GetDimension("time", logFileName, connection => RetrieveTimeDimensions(connection)); } var packagesTask = GetDimension("package", logFileName, connection => RetrievePackageDimensions(sourceData, connection)); var operationsTask = GetDimension("operation", logFileName, connection => RetrieveOperationDimensions(sourceData, connection)); var projectTypesTask = GetDimension("project type", logFileName, connection => RetrieveProjectTypeDimensions(sourceData, connection)); var clientsTask = GetDimension("client", logFileName, connection => RetrieveClientDimensions(sourceData, connection)); var platformsTask = GetDimension("platform", logFileName, connection => RetrievePlatformDimensions(sourceData, connection)); var datesTask = GetDimension("date", logFileName, connection => RetrieveDateDimensions(connection, sourceData.Min(e => e.EdgeServerTimeDelivered), sourceData.Max(e => e.EdgeServerTimeDelivered))); var packageTranslationsTask = GetDimension("package translations", logFileName, connection => RetrievePackageTranslations(sourceData, connection)); await Task.WhenAll(operationsTask, projectTypesTask, clientsTask, platformsTask, datesTask, packagesTask, packageTranslationsTask); var operations = operationsTask.Result; var projectTypes = projectTypesTask.Result; var clients = clientsTask.Result; var platforms = platformsTask.Result; var dates = datesTask.Result; var packages = packagesTask.Result; var packageTranslations = packageTranslationsTask.Result; // create facts data rows by linking source data with dimensions var dataImporter = new DataImporter(_targetDatabase); var dataTable = await dataImporter.GetDataTableAsync("Fact_Download"); // ensure all dimension IDs are set to the Unknown equivalent if no dimension data is available int?operationId = !operations.Any() ? DimensionId.Unknown : (int?)null; int?projectTypeId = !projectTypes.Any() ? DimensionId.Unknown : (int?)null; int?clientId = !clients.Any() ? DimensionId.Unknown : (int?)null; int?platformId = !platforms.Any() ? DimensionId.Unknown : (int?)null; Trace.WriteLine("Creating facts..."); foreach (var groupedByPackageId in sourceData.GroupBy(e => e.PackageId, StringComparer.OrdinalIgnoreCase)) { var packagesForId = packages.Where(e => string.Equals(e.PackageId, groupedByPackageId.Key, StringComparison.OrdinalIgnoreCase)).ToList(); foreach (var groupedByPackageIdAndVersion in groupedByPackageId.GroupBy(e => e.PackageVersion, StringComparer.OrdinalIgnoreCase)) { int packageId; var package = packagesForId.FirstOrDefault(e => string.Equals(e.PackageVersion, groupedByPackageIdAndVersion.Key, StringComparison.OrdinalIgnoreCase)); if (package == null) { // This package id and version could not be 100% accurately parsed from the CDN Request URL, // likely due to weird package ID which could be interpreted as a version string. // Look for a mapping in the support table in an attempt to auto-correct this entry. var packageTranslation = packageTranslations.FirstOrDefault(t => t.IncorrectPackageId == groupedByPackageId.Key && t.IncorrectPackageVersion == groupedByPackageIdAndVersion.Key); if (packageTranslation != null) { // there seems to be a mapping packageId = packageTranslation.CorrectedPackageId; } else { // Track it in Application Insights. ApplicationInsights.TrackPackageNotFound(groupedByPackageId.Key, groupedByPackageIdAndVersion.Key, logFileName); continue; } } else { packageId = package.Id; } foreach (var element in groupedByPackageIdAndVersion) { // required dimensions var dateId = dates.First(e => e.Date.Equals(element.EdgeServerTimeDelivered.Date)).Id; var timeId = _times.First(e => e.HourOfDay == element.EdgeServerTimeDelivered.Hour).Id; // dimensions that could be "(unknown)" if (!operationId.HasValue) { if (!operations.ContainsKey(element.Operation)) { operationId = DimensionId.Unknown; } else { operationId = operations[element.Operation]; } } if (!platformId.HasValue) { if (!platforms.ContainsKey(element.UserAgent)) { platformId = DimensionId.Unknown; } else { platformId = platforms[element.UserAgent]; } } if (!clientId.HasValue) { if (!clients.ContainsKey(element.UserAgent)) { clientId = DimensionId.Unknown; } else { clientId = clients[element.UserAgent]; } } if (!projectTypeId.HasValue) { // foreach project type foreach (var projectGuid in element.ProjectGuids.Split(new[] { ";" }, StringSplitOptions.RemoveEmptyEntries)) { projectTypeId = projectTypes[projectGuid]; var dataRow = dataTable.NewRow(); FillDataRow(dataRow, dateId, timeId, packageId, operationId.Value, platformId.Value, projectTypeId.Value, clientId.Value, logFileName, element.UserAgent); dataTable.Rows.Add(dataRow); } } else { var dataRow = dataTable.NewRow(); FillDataRow(dataRow, dateId, timeId, packageId, operationId.Value, platformId.Value, projectTypeId.Value, clientId.Value, logFileName, element.UserAgent); dataTable.Rows.Add(dataRow); } } } } stopwatch.Stop(); Trace.Write(" DONE (" + dataTable.Rows.Count + " records, " + stopwatch.ElapsedMilliseconds + "ms)"); ApplicationInsights.TrackMetric("Blob record count", dataTable.Rows.Count, logFileName); return(dataTable); }
private async Task <CdnStatistics> ParseLogEntries(ILeasedLogFile logFile) { var logStream = await OpenCompressedBlobAsync(logFile); var blobUri = logFile.Uri; var blobName = logFile.Blob.Name; var packageStatistics = new List <PackageStatistics>(); var toolStatistics = new List <ToolStatistics>(); var dnxStatistics = new List <DnxStatistics>(); var stopwatch = Stopwatch.StartNew(); try { // parse the log into table entities _jobEventSource.BeginningParseLog(blobUri); using (var logStreamReader = new StreamReader(logStream)) { do { var rawLogLine = logStreamReader.ReadLine(); if (rawLogLine != null) { var logEntry = CdnLogEntryParser.ParseLogEntryFromLine(rawLogLine); if (logEntry != null) { var statistic = PackageStatisticsParser.FromCdnLogEntry(logEntry); if (statistic != null) { packageStatistics.Add(statistic); } else { // check if this is a dist.nuget.org download if (logEntry.RequestUrl.Contains("dist.nuget.org/")) { var toolInfo = ToolStatisticsParser.FromCdnLogEntry(logEntry); if (toolInfo != null) { toolStatistics.Add(toolInfo); } } if (logEntry.RequestUrl.Contains("dist.asp.net")) { var dnxInfo = DnxStatisticsParser.FromCdnLogEntry(logEntry); if (dnxInfo != null) { dnxStatistics.Add(dnxInfo); } } } } } } while (!logStreamReader.EndOfStream); } _jobEventSource.FinishingParseLog(blobUri, packageStatistics.Count); stopwatch.Stop(); ApplicationInsights.TrackMetric("Blob parsing duration (ms)", stopwatch.ElapsedMilliseconds, blobName); } catch (Exception exception) { if (stopwatch.IsRunning) { stopwatch.Stop(); } _jobEventSource.FailedParseLog(blobUri); ApplicationInsights.TrackException(exception, blobName); throw; } finally { logStream.Dispose(); } var cdnStatistics = new CdnStatistics(packageStatistics, toolStatistics, dnxStatistics); return(cdnStatistics); }