public override async Task Run() { // Get the target blob container (for archiving decompressed log files) var targetBlobContainer = _cloudBlobClient.GetContainerReference( _configuration.AzureCdnCloudStorageContainerName + "-archive"); await targetBlobContainer.CreateIfNotExistsAsync(); // Get the dead-letter table (corrupted or failed blobs will end up there) var deadLetterBlobContainer = _cloudBlobClient.GetContainerReference( _configuration.AzureCdnCloudStorageContainerName + "-deadletter"); await deadLetterBlobContainer.CreateIfNotExistsAsync(); // Create a parser var warehouse = new Warehouse( LoggerFactory, OpenSqlConnectionAsync <StatisticsDbConfiguration>, _applicationInsightsHelper); var statisticsBlobContainerUtility = new StatisticsBlobContainerUtility( targetBlobContainer, deadLetterBlobContainer, LoggerFactory, _applicationInsightsHelper); var logProcessor = new LogFileProcessor( statisticsBlobContainerUtility, LoggerFactory, warehouse, _applicationInsightsHelper); // Get the next to-be-processed raw log file using the cdn raw log file name prefix var prefix = string.Format(CultureInfo.InvariantCulture, "{0}_{1}_", _azureCdnPlatform.GetRawLogFilePrefix(), _configuration.AzureCdnAccountNumber); // Get next raw log file to be processed IReadOnlyCollection <string> alreadyAggregatedLogFiles = null; if (_configuration.AggregatesOnly) { // We only want to process aggregates for the log files. // Get the list of files we already processed so we can skip them. alreadyAggregatedLogFiles = await warehouse.GetAlreadyAggregatedLogFilesAsync(); } var leasedLogFiles = await _blobLeaseManager.LeaseNextLogFilesToBeProcessedAsync(prefix, alreadyAggregatedLogFiles); foreach (var leasedLogFile in leasedLogFiles) { var packageTranslator = new PackageTranslator(); var packageStatisticsParser = new PackageStatisticsParser(packageTranslator, LoggerFactory); await logProcessor.ProcessLogFileAsync(leasedLogFile, packageStatisticsParser, _configuration.AggregatesOnly); if (_configuration.AggregatesOnly) { _blobLeaseManager.TrackLastProcessedBlobUri(leasedLogFile.Uri); } leasedLogFile.Dispose(); } }
private async Task <IReadOnlyCollection <PackageStatistics> > ParseLogEntries(ILeasedLogFile logFile) { var logStream = await OpenCompressedBlobAsync(logFile); var blobUri = logFile.Uri; var blobName = logFile.Blob.Name; var packageStatistics = new List <PackageStatistics>(); var stopwatch = Stopwatch.StartNew(); try { // parse the log into table entities _jobEventSource.BeginningParseLog(blobUri); using (var logStreamReader = new StreamReader(logStream)) { do { var rawLogLine = logStreamReader.ReadLine(); if (rawLogLine != null) { var logEntry = CdnLogEntryParser.ParseLogEntryFromLine(rawLogLine); if (logEntry != null) { var statistic = PackageStatisticsParser.FromCdnLogEntry(logEntry); if (statistic != null) { packageStatistics.Add(statistic); } } } } while (!logStreamReader.EndOfStream); } _jobEventSource.FinishingParseLog(blobUri, packageStatistics.Count); stopwatch.Stop(); ApplicationInsights.TrackMetric("Blob parsing duration (ms)", stopwatch.ElapsedMilliseconds, blobName); } catch (Exception exception) { if (stopwatch.IsRunning) { stopwatch.Stop(); } _jobEventSource.FailedParseLog(blobUri); ApplicationInsights.TrackException(exception, blobName); throw; } finally { logStream.Dispose(); } return(packageStatistics); }
public override async Task <bool> Run() { try { // Get the target blob container (for archiving decompressed log files) var targetBlobContainer = _cloudBlobClient.GetContainerReference(_cloudStorageContainerName + "-archive"); await targetBlobContainer.CreateIfNotExistsAsync(); // Get the dead-letter table (corrupted or failed blobs will end up there) var deadLetterBlobContainer = _cloudBlobClient.GetContainerReference(_cloudStorageContainerName + "-deadletter"); await deadLetterBlobContainer.CreateIfNotExistsAsync(); // Create a parser var warehouse = new Warehouse(_loggerFactory, _targetDatabase); var statisticsBlobContainerUtility = new StatisticsBlobContainerUtility( targetBlobContainer, deadLetterBlobContainer, _loggerFactory); var logProcessor = new LogFileProcessor(statisticsBlobContainerUtility, _loggerFactory, warehouse); // Get the next to-be-processed raw log file using the cdn raw log file name prefix var prefix = string.Format(CultureInfo.InvariantCulture, "{0}_{1}_", _azureCdnPlatform.GetRawLogFilePrefix(), _azureCdnAccountNumber); // Get next raw log file to be processed IReadOnlyCollection <string> alreadyAggregatedLogFiles = null; if (_aggregatesOnly) { // We only want to process aggregates for the log files. // Get the list of files we already processed so we can skip them. alreadyAggregatedLogFiles = await warehouse.GetAlreadyAggregatedLogFilesAsync(); } var leasedLogFiles = await _blobLeaseManager.LeaseNextLogFilesToBeProcessedAsync(prefix, alreadyAggregatedLogFiles); foreach (var leasedLogFile in leasedLogFiles) { var packageTranslator = new PackageTranslator("packagetranslations.json"); var packageStatisticsParser = new PackageStatisticsParser(packageTranslator); await logProcessor.ProcessLogFileAsync(leasedLogFile, packageStatisticsParser, _aggregatesOnly); if (_aggregatesOnly) { _blobLeaseManager.TrackLastProcessedBlobUri(leasedLogFile.Uri); } leasedLogFile.Dispose(); } } catch (Exception exception) { _logger.LogCritical(LogEvents.JobRunFailed, exception, "Job run failed!"); return(false); } return(true); }
private async Task <CdnStatistics> ParseLogEntries(ILeasedLogFile logFile) { var logStream = await OpenCompressedBlobAsync(logFile); var blobUri = logFile.Uri; var blobName = logFile.Blob.Name; var packageStatistics = new List <PackageStatistics>(); var toolStatistics = new List <ToolStatistics>(); var dnxStatistics = new List <DnxStatistics>(); var stopwatch = Stopwatch.StartNew(); try { // parse the log into table entities _jobEventSource.BeginningParseLog(blobUri); using (var logStreamReader = new StreamReader(logStream)) { do { var rawLogLine = logStreamReader.ReadLine(); if (rawLogLine != null) { var logEntry = CdnLogEntryParser.ParseLogEntryFromLine(rawLogLine); if (logEntry != null) { var statistic = PackageStatisticsParser.FromCdnLogEntry(logEntry); if (statistic != null) { packageStatistics.Add(statistic); } else { // check if this is a dist.nuget.org download if (logEntry.RequestUrl.Contains("dist.nuget.org/")) { var toolInfo = ToolStatisticsParser.FromCdnLogEntry(logEntry); if (toolInfo != null) { toolStatistics.Add(toolInfo); } } if (logEntry.RequestUrl.Contains("dist.asp.net")) { var dnxInfo = DnxStatisticsParser.FromCdnLogEntry(logEntry); if (dnxInfo != null) { dnxStatistics.Add(dnxInfo); } } } } } } while (!logStreamReader.EndOfStream); } _jobEventSource.FinishingParseLog(blobUri, packageStatistics.Count); stopwatch.Stop(); ApplicationInsights.TrackMetric("Blob parsing duration (ms)", stopwatch.ElapsedMilliseconds, blobName); } catch (Exception exception) { if (stopwatch.IsRunning) { stopwatch.Stop(); } _jobEventSource.FailedParseLog(blobUri); ApplicationInsights.TrackException(exception, blobName); throw; } finally { logStream.Dispose(); } var cdnStatistics = new CdnStatistics(packageStatistics, toolStatistics, dnxStatistics); return(cdnStatistics); }