public override async Task <bool> Run() { try { // construct a cloud blob client for the configured storage account var cloudBlobClient = _cloudStorageAccount.CreateCloudBlobClient(); cloudBlobClient.DefaultRequestOptions.RetryPolicy = new ExponentialRetry(TimeSpan.FromSeconds(10), 5); // Get the target blob container (to store the generated reports) var targetBlobContainer = cloudBlobClient.GetContainerReference(_destinationContainerName); await targetBlobContainer.CreateIfNotExistsAsync(); var blobContainerPermissions = new BlobContainerPermissions(); blobContainerPermissions.PublicAccess = BlobContainerPublicAccessType.Blob; await targetBlobContainer.SetPermissionsAsync(blobContainerPermissions); Trace.TraceInformation("Generating Download Count Report from {0}/{1} to {2}/{3}.", _sourceDatabase.DataSource, _sourceDatabase.InitialCatalog, _cloudStorageAccount.Credentials.AccountName, _destinationContainerName); // Gather download count data from statistics warehouse IReadOnlyCollection <DownloadCountData> downloadData; Trace.TraceInformation("Gathering Download Counts from {0}/{1}...", _sourceDatabase.DataSource, _sourceDatabase.InitialCatalog); using (var connection = await _sourceDatabase.ConnectTo()) { downloadData = (await connection.QueryWithRetryAsync <DownloadCountData>(_storedProcedureName, commandType: CommandType.StoredProcedure)).ToList(); } Trace.TraceInformation("Gathered {0} rows of data.", downloadData.Count); if (downloadData.Any()) { // Group based on Package Id var grouped = downloadData.GroupBy(p => p.PackageId); var registrations = new JArray(); foreach (var group in grouped) { var details = new JArray(); details.Add(group.Key); foreach (var gv in group) { var version = new JArray(gv.PackageVersion, gv.TotalDownloadCount); details.Add(version); } registrations.Add(details); } var blob = targetBlobContainer.GetBlockBlobReference(_reportName); Trace.TraceInformation("Writing report to {0}", blob.Uri.AbsoluteUri); blob.Properties.ContentType = "application/json"; await blob.UploadTextAsync(registrations.ToString(Formatting.None)); Trace.TraceInformation("Wrote report to {0}", blob.Uri.AbsoluteUri); } return(true); } catch (Exception exception) { Trace.TraceError(exception.ToString()); return(false); } }
public override async Task <bool> Run() { try { using (var connection = await _targetDatabase.ConnectTo()) { connection.InfoMessage -= OnSqlConnectionInfoMessage; connection.InfoMessage += OnSqlConnectionInfoMessage; var sqlCommand = new SqlCommand("[dbo].[RollUpDownloadFacts]", connection); sqlCommand.CommandType = CommandType.StoredProcedure; sqlCommand.CommandTimeout = 23 * 60 * 60; sqlCommand.Parameters.Add(new SqlParameter("MinAgeInDays", _minAgeInDays)); await sqlCommand.ExecuteScalarAsync(); } return(true); } catch (Exception exception) { _logger.LogCritical("Job run failed. {Exception}", exception); return(false); } }
public async Task <DataTable> GetDataTableAsync(string tableName) { var dataTable = new DataTable(); var query = string.Format(_sqlSelectTop1FromTable, tableName); using (var connection = await _targetDatabase.ConnectTo()) { var tableAdapter = new SqlDataAdapter(query, connection) { MissingSchemaAction = MissingSchemaAction.Add }; tableAdapter.Fill(dataTable); } dataTable.Rows.Clear(); // Remove Id column from in-memory data table. // These are auto-generated on the database upon insert. if (dataTable.Columns.Contains("Id")) { dataTable.PrimaryKey = null; dataTable.Columns.Remove("Id"); } // Remove Timestamp column from in-memory data table. // These are auto-generated on the database upon insert. if (dataTable.Columns.Contains("Timestamp")) { dataTable.Columns.Remove("Timestamp"); } dataTable.TableName = $"dbo.{tableName}"; return(dataTable); }
internal async Task <SqlConnection> OpenConnectionAsync() { var connection = await _sourceDatabase.ConnectTo(); connection.InfoMessage += OnSqlConnectionInfoMessage; return(connection); }
public static async Task <int> GetMaxSourceKey(SqlConnectionStringBuilder connectionString) { using (var connection = await connectionString.ConnectTo()) { SqlCommand command = new SqlCommand("SELECT MAX([Key]) AS MaxOriginalKey FROM PackageStatistics", connection); int? maxOriginalKey = await command.ExecuteScalarAsync() as int?; return(maxOriginalKey ?? -1); } }
private static async Task <XDocument> GetDownloadRecords(SqlConnectionStringBuilder source, ReplicationSourceMarker sourceMarker, ReplicationTargetMarker targetMarker, int batchSize) { using (var connection = await source.ConnectTo()) { using (var command = new SqlCommand(@" SELECT TOP(@batchSize) PackageStatistics.[Key] 'originalKey', PackageRegistrations.[Id] 'packageId', Packages.[Version] 'packageVersion', Packages.[Listed] 'packageListed', Packages.[Title] 'packageTitle', Packages.[Description] 'packageDescription', Packages.[IconUrl] 'packageIconUrl', ISNULL(PackageStatistics.[UserAgent], '') 'downloadUserAgent', ISNULL(PackageStatistics.[Operation], '') 'downloadOperation', PackageStatistics.[Timestamp] 'downloadTimestamp', PackageStatistics.[ProjectGuids] 'downloadProjectTypes', PackageStatistics.[DependentPackage] 'downloadDependentPackageId' FROM PackageStatistics INNER JOIN Packages ON PackageStatistics.PackageKey = Packages.[Key] INNER JOIN PackageRegistrations ON PackageRegistrations.[Key] = Packages.PackageRegistrationKey WHERE PackageStatistics.[Key] >= @minSourceKey AND PackageStatistics.[Key] <= @maxSourceKey AND PackageStatistics.[Timestamp] >= @minTimestamp AND PackageStatistics.[Timestamp] < @maxTimestamp AND PackageStatistics.[Key] > @cursor ORDER BY PackageStatistics.[Key] FOR XML RAW('fact'), ELEMENTS, ROOT('facts') ", connection)) { command.Parameters.AddWithValue("@batchSize", batchSize); command.Parameters.AddWithValue("@minSourceKey", sourceMarker.MinKey); command.Parameters.AddWithValue("@maxSourceKey", sourceMarker.MaxKey); command.Parameters.AddWithValue("@cursor", targetMarker.Cursor ?? 0); command.Parameters.AddWithValue("@minTimestamp", targetMarker.MinTimestamp); command.Parameters.AddWithValue("@maxTimestamp", targetMarker.MaxTimestamp); var factsReader = await command.ExecuteXmlReaderAsync(); var nodeType = factsReader.MoveToContent(); if (nodeType != XmlNodeType.None) { var factsDocument = XDocument.Load(factsReader); return(factsDocument); } else { // No data returned return(null); } } } }
public static async Task UpdateDirtyPackageIdCursor(SqlConnectionStringBuilder sourceDatabase, DateTime runToCursor) { using (var connection = await sourceDatabase.ConnectTo()) { var command = new SqlCommand("[dbo].[UpdateDirtyPackageIdCursor]", connection); command.CommandType = CommandType.StoredProcedure; command.CommandTimeout = _commandTimeout; command.Parameters.Add("@Position", SqlDbType.DateTime).Value = runToCursor; await command.ExecuteNonQueryAsync(); } }
private static async Task <ReplicationSourceMarker> GetReplicationSourceMarker(SqlConnectionStringBuilder source, DateTime?minTimestamp, DateTime?maxTimestamp) { string sql = @" SELECT MIN([Key]) AS MinKey , MAX([Key]) AS MaxKey , MIN([Timestamp]) AS MinTimestamp , MAX([Timestamp]) AS MaxTimestamp , COUNT(*) AS Records FROM PackageStatistics WHERE [Timestamp] >= @minTimestamp AND [Timestamp] < @maxTimestamp" ; using (var connection = await source.ConnectTo()) { SqlCommand command = new SqlCommand(sql, connection); command.Parameters.AddWithValue("@minTimestamp", minTimestamp ?? System.Data.SqlTypes.SqlDateTime.MinValue); command.Parameters.AddWithValue("@maxTimestamp", maxTimestamp ?? System.Data.SqlTypes.SqlDateTime.MaxValue); using (var result = await command.ExecuteReaderAsync(CommandBehavior.SingleResult | CommandBehavior.SingleRow | CommandBehavior.KeyInfo)) { if (result.HasRows && result.Read()) { if (!result.IsDBNull(result.GetOrdinal("MinKey")) && !result.IsDBNull(result.GetOrdinal("MaxKey"))) { return(new ReplicationSourceMarker { MinKey = result.GetInt32(result.GetOrdinal("MinKey")), MaxKey = result.GetInt32(result.GetOrdinal("MaxKey")), // Keep the original timestamp min/max values if specified // Otherwise we lose the boundary values and might not process the window (thinking it's incomplete) MinTimestamp = minTimestamp ?? result.GetDateTime(result.GetOrdinal("MinTimestamp")), MaxTimestamp = maxTimestamp ?? result.GetDateTime(result.GetOrdinal("MaxTimestamp")), RecordsToReplicate = result.GetInt32(result.GetOrdinal("Records")) }); } else { return(new ReplicationSourceMarker { RecordsToReplicate = 0 }); } } } } return(new ReplicationSourceMarker { MinKey = 0, MaxKey = 0 }); }
private static async Task <int?> GetTargetCursor(SqlConnectionStringBuilder target, ReplicationTargetMarker targetMarker) { using (var connection = await target.ConnectTo()) { using (var command = new SqlCommand("GetCursor", connection)) { command.CommandType = CommandType.StoredProcedure; command.Parameters.AddWithValue("@minTimestamp", targetMarker.MinTimestamp); command.Parameters.AddWithValue("@maxTimestamp", targetMarker.MaxTimestamp); return(await command.ExecuteScalarAsync() as int?); } } }
private async Task <int> GetNumberOfRecordsToPurge(SqlConnectionStringBuilder Source, DateTime minTimestampToKeep) { var sql = @" SELECT COUNT(*) FROM PackageStatistics WITH (NOLOCK) WHERE [Timestamp] < @MinTimestampToKeep" ; using (var connection = await Source.ConnectTo()) { var command = new SqlCommand(sql, connection); command.Parameters.AddWithValue("@MinTimestampToKeep", minTimestampToKeep); return(await command.ExecuteScalarAsync() as int? ?? 0); } }
internal async Task InsertDownloadFactsAsync(DataTable downloadFacts, string logFileName) { Trace.WriteLine("Inserting into facts table..."); var stopwatch = Stopwatch.StartNew(); using (var connection = await _targetDatabase.ConnectTo()) using (var transaction = connection.BeginTransaction(IsolationLevel.Serializable)) { var bulkCopy = new SqlBulkCopy(connection, SqlBulkCopyOptions.Default, transaction); bulkCopy.DestinationTableName = downloadFacts.TableName; bulkCopy.BulkCopyTimeout = _defaultCommandTimeout; try { await bulkCopy.WriteToServerAsync(downloadFacts); transaction.Commit(); stopwatch.Stop(); ApplicationInsights.TrackMetric("Insert facts duration (ms)", stopwatch.ElapsedMilliseconds, logFileName); } catch (Exception exception) { if (stopwatch.IsRunning) { stopwatch.Stop(); } ApplicationInsights.TrackException(exception, logFileName); transaction.Rollback(); throw; } } Trace.Write(" DONE"); }
public override async Task Run() { using (var connection = await _targetDatabase.ConnectTo()) { connection.InfoMessage -= OnSqlConnectionInfoMessage; connection.InfoMessage += OnSqlConnectionInfoMessage; var sqlCommand = new SqlCommand("[dbo].[RollUpDownloadFacts]", connection); sqlCommand.CommandType = CommandType.StoredProcedure; sqlCommand.CommandTimeout = 23 * 60 * 60; sqlCommand.Parameters.AddWithValue("MinAgeInDays", _minAgeInDays); await sqlCommand.ExecuteScalarAsync(); } }
public async Task <DataTable> GetDataTableAsync(string tableName) { var dataTable = new DataTable(); var query = string.Format(_sqlSelectTop1FromTable, tableName); using (var connection = await _targetDatabase.ConnectTo()) { var tableAdapter = new SqlDataAdapter(query, connection) { MissingSchemaAction = MissingSchemaAction.AddWithKey }; tableAdapter.Fill(dataTable); } dataTable.Rows.Clear(); dataTable.Columns.Remove("Timestamp"); return(dataTable); }
private static async Task <IReadOnlyCollection <DirtyPackageId> > GetDirtyPackageIdsFromWarehouse(SqlConnectionStringBuilder sourceDatabase) { using (var connection = await sourceDatabase.ConnectTo()) { var command = new SqlCommand("[dbo].[GetDirtyPackageIds]", connection); command.CommandType = CommandType.StoredProcedure; command.CommandTimeout = _commandTimeout; var packageIds = new List <DirtyPackageId>(); using (var reader = await command.ExecuteReaderAsync()) { while (await reader.ReadAsync()) { packageIds.Add(new DirtyPackageId(reader.GetString(0), reader.GetDateTime(1))); } } return(packageIds); } }
private async Task <DateTime?> GetMinTimestampToKeep(SqlConnectionStringBuilder Destination) { // Get the most recent cursor window that is older than the days we want to keep. // By getting the MAX(MinTimestamp), we'll delete statistics older than the beginning of the // most recent window that has begun processing (but isn't guaranteed to have completed). // Note that we made sure to treat DaysToKeep as a NEGATIVE number for the expected behavior var sql = @" SELECT MAX(MinTimestamp) FROM CollectorCursor WHERE MinTimestamp <= DATEADD(day, -ABS(@DaysToKeep), convert(date, GETUTCDATE()))"; using (var connection = await Destination.ConnectTo()) { SqlCommand command = new SqlCommand(sql, connection); command.Parameters.AddWithValue("@DaysToKeep", DaysToKeep); return(await command.ExecuteScalarAsync() as DateTime?); } }
public static async Task <int> GetMaxTargetKey(SqlConnectionStringBuilder connectionString) { using (var connection = await connectionString.ConnectTo()) { SqlCommand command = new SqlCommand("GetLastOriginalKey", connection) { CommandType = CommandType.StoredProcedure }; SqlParameter param = new SqlParameter("@OriginalKey", SqlDbType.Int) { Direction = ParameterDirection.Output }; command.Parameters.Add(param); await command.ExecuteNonQueryAsync(); int?maxOriginalKey = param.Value as int?; return(maxOriginalKey ?? -1); } }
private static async Task PutDownloadRecords(SqlConnectionStringBuilder target, XDocument batch, ReplicationTargetMarker currentCursor, ReplicationTargetMarker newCursor) { using (var connection = await target.ConnectTo()) { using (var transaction = connection.BeginTransaction()) { using (var command = new SqlCommand("AddDownloadFacts", connection, transaction)) { command.CommandType = CommandType.StoredProcedure; command.Parameters.AddWithValue("@facts", batch.ToString()); command.Parameters.AddWithValue("@cursorMinTimestamp", currentCursor.MinTimestamp); command.Parameters.AddWithValue("@cursorMaxTimestamp", currentCursor.MaxTimestamp); command.Parameters.AddWithValue("@cursor", newCursor.Cursor); await command.ExecuteNonQueryAsync(); transaction.Commit(); } } } }
private static async Task ClearDownloadFacts(SqlConnectionStringBuilder target, DateTime minTimestamp, DateTime maxTimestamp) { int totalRecordsCleared = 0; int recordsClearedInBatch = 0; do { JobEventSourceLog.ClearingDownloadFacts(minTimestamp, maxTimestamp); using (var connection = await target.ConnectTo()) { // This proc will delete 5000 records at a time, so we have to run in a loop until there's nothing more to delete using (var command = new SqlCommand("ClearDownloadFacts", connection) { CommandTimeout = 60 * 30 }) // 30-minute timeout { command.CommandType = CommandType.StoredProcedure; command.Parameters.AddWithValue("@minTimestamp", minTimestamp); command.Parameters.AddWithValue("@maxTimestamp", maxTimestamp); var recordsCleared = new SqlParameter() { Direction = ParameterDirection.ReturnValue }; command.Parameters.Add(recordsCleared); await command.ExecuteNonQueryAsync(); recordsClearedInBatch = (int)recordsCleared.Value; totalRecordsCleared += recordsClearedInBatch; } } JobEventSourceLog.ClearedDownloadFacts(recordsClearedInBatch, "Batch Completed."); }while (recordsClearedInBatch == 5000); // Hard-coded to match the stored proc - allows us to stop when done JobEventSourceLog.ClearedDownloadFacts(totalRecordsCleared, "Finished."); }
private async Task <DataTable> ExecuteSql(params Tuple <string, int, string>[] parameters) { using (var connection = await _sourceDatabase.ConnectTo()) { var command = new SqlCommand(_procedureName, connection); command.CommandType = CommandType.StoredProcedure; command.CommandTimeout = _commandTimeout; foreach (Tuple <string, int, string> parameter in parameters) { command.Parameters.Add(parameter.Item1, SqlDbType.NVarChar, parameter.Item2).Value = parameter.Item3; } var table = new DataTable(); using (var reader = await command.ExecuteReaderAsync()) { table.Load(reader); } return(table); } }
public static async Task <IReadOnlyCollection <string> > ListInactivePackageIdReports(SqlConnectionStringBuilder sourceDatabase, DateTime reportGenerationTime, int commandTimeout) { using (var connection = await sourceDatabase.ConnectTo()) { var command = new SqlCommand("[dbo].[DownloadReportListInactive]", connection); command.CommandType = CommandType.StoredProcedure; command.CommandTimeout = commandTimeout; command.Parameters.Add("ReportGenerationTime", SqlDbType.DateTime).Value = reportGenerationTime; var packageIds = new List <string>(); using (var reader = await command.ExecuteReaderAsync()) { while (await reader.ReadAsync()) { packageIds.Add(reader.GetString(0)); } } return(packageIds); } }
private static async Task <ReplicationTargetMarker> GetReplicationTargetMarker(SqlConnectionStringBuilder target, ReplicationSourceMarker sourceMarker) { using (var connection = await target.ConnectTo()) { using (var command = new SqlCommand("CreateCursor", connection)) { var minTimestamp = new SqlParameter("@minTimestamp", sourceMarker.MinTimestamp) { Direction = ParameterDirection.InputOutput }; var maxTimestamp = new SqlParameter("@maxTimestamp", sourceMarker.MaxTimestamp) { Direction = ParameterDirection.InputOutput }; command.CommandType = CommandType.StoredProcedure; command.Parameters.Add(minTimestamp); command.Parameters.Add(maxTimestamp); await command.ExecuteNonQueryAsync(); // If the min/max pair is null then that means there are no records missing // from the target. So we use the MaxTimestamp as the null value for BOTH // as that will result in no records to replicate, but we also set the flag. var minTimestampValue = (minTimestamp.Value as DateTime?) ?? sourceMarker.MaxTimestamp; var maxTimestampValue = (maxTimestamp.Value as DateTime?) ?? sourceMarker.MaxTimestamp; return(new ReplicationTargetMarker { MinTimestamp = minTimestampValue, MaxTimestamp = maxTimestampValue, TimeWindowNeedsReplication = (minTimestampValue < maxTimestampValue) }); } } }
public override async Task <bool> Run() { try { Trace.TraceInformation("Updating Download Counts from {0}/{1} to {2}/{3}.", _statisticsDatabase.DataSource, _statisticsDatabase.InitialCatalog, _destinationDatabase.DataSource, _destinationDatabase.InitialCatalog); // Gather download counts data from statistics warehouse IReadOnlyCollection <DownloadCountData> downloadData; Trace.TraceInformation("Gathering Download Counts from {0}/{1}...", _statisticsDatabase.DataSource, _statisticsDatabase.InitialCatalog); using (var statisticsDatabase = await _statisticsDatabase.ConnectTo()) using (var statisticsDatabaseTransaction = statisticsDatabase.BeginTransaction(IsolationLevel.Snapshot)) { downloadData = (await statisticsDatabase.QueryWithRetryAsync <DownloadCountData>( StoredProcedureName, transaction: statisticsDatabaseTransaction, commandType: CommandType.StoredProcedure)).ToList(); } Trace.TraceInformation("Gathered {0} rows of data.", downloadData.Count); if (downloadData.Any()) { // Group based on Package Id var packageRegistrationGroups = downloadData.GroupBy(p => p.PackageId); using (var destinationDatabase = await _destinationDatabase.ConnectTo()) { // Fetch package registrations so we can match Trace.TraceInformation("Retrieving package registrations..."); var packageRegistrationLookup = ( await destinationDatabase.QueryWithRetryAsync <PackageRegistrationData>( "SELECT [Key], LOWER([Id]) AS Id FROM [dbo].[PackageRegistrations]")) .Where(item => !string.IsNullOrEmpty(item.Id)) .ToDictionary(item => item.Id, item => item.Key); Trace.TraceInformation("Retrieved package registrations."); // Create a temporary table Trace.TraceInformation("Creating temporary table..."); await destinationDatabase.ExecuteAsync(CreateTempTable); // Load temporary table var aggregateCdnDownloadsInGalleryTable = new DataTable(); var command = new SqlCommand("SELECT * FROM " + TempTableName, destinationDatabase); command.CommandType = CommandType.Text; command.CommandTimeout = 60 * 5; var reader = await command.ExecuteReaderAsync(); aggregateCdnDownloadsInGalleryTable.Load(reader); aggregateCdnDownloadsInGalleryTable.Rows.Clear(); Trace.TraceInformation("Created temporary table."); // Populate temporary table in memory Trace.TraceInformation("Populating temporary table in memory..."); foreach (var packageRegistrationGroup in packageRegistrationGroups) { // don't process empty package id's if (string.IsNullOrEmpty(packageRegistrationGroup.First().PackageId)) { continue; } var packageId = packageRegistrationGroup.First().PackageId.ToLowerInvariant(); // Get package registration key if (!packageRegistrationLookup.ContainsKey(packageId)) { continue; } var packageRegistrationKey = packageRegistrationLookup[packageId]; // Set download count on individual packages foreach (var package in packageRegistrationGroup) { var row = aggregateCdnDownloadsInGalleryTable.NewRow(); row["PackageRegistrationKey"] = packageRegistrationKey; row["PackageVersion"] = package.PackageVersion; row["DownloadCount"] = package.TotalDownloadCount; aggregateCdnDownloadsInGalleryTable.Rows.Add(row); } } Trace.TraceInformation("Populated temporary table in memory. (" + aggregateCdnDownloadsInGalleryTable.Rows.Count + " rows)"); // Transfer to SQL database Trace.TraceInformation("Populating temporary table in database..."); using (SqlBulkCopy bulkcopy = new SqlBulkCopy(destinationDatabase)) { bulkcopy.BulkCopyTimeout = 60 * 30; // 30 minutes bulkcopy.DestinationTableName = TempTableName; bulkcopy.WriteToServer(aggregateCdnDownloadsInGalleryTable); bulkcopy.Close(); } Trace.TraceInformation("Populated temporary table in database."); // Update counts in destination database Trace.TraceInformation("Updating destination database Download Counts... (" + packageRegistrationGroups.Count() + " package registrations to process)"); await destinationDatabase.ExecuteAsync(UpdateFromTempTable, timeout : 60 * 30); // 30 minutes Trace.TraceInformation("Updated destination database Download Counts."); } } return(true); } catch (Exception exception) { Trace.TraceError(exception.ToString()); return(false); } }
private static async Task Run() { try { using (var connection = await _targetDatabase.ConnectTo()) { IDictionary <string, Tuple <int, int> > linkedUserAgents; // This dictionary uses the user agent string as the key. // The first item of the Tuple-value contains the user agent ID. // The seconds item in the Tuple will contain the ClientDimension as parsed from the user agent using knownclients.yaml. IDictionary <string, Tuple <int, ClientDimension> > currentUserAgentInfo = null; if (string.IsNullOrWhiteSpace(_targetClientName)) { // Patch only unknown clients // 0. Get a distinct collection of all useragents linked to (unknown) client linkedUserAgents = await Warehouse.GetUnknownUserAgents(connection); if (linkedUserAgents.Any()) { // 1. Parse them and detect the ones that are recognized by the parser currentUserAgentInfo = ParseUserAgentsAndLinkToClientDimension(linkedUserAgents); } } else { // Patch only clients already linked to the TargetClientName // 0. Get a distinct collection of all useragents linked to TargetClientName linkedUserAgents = await Warehouse.GetLinkedUserAgents(connection, _targetClientName, _userAgentFilter); if (!linkedUserAgents.Any()) { // The client dimension does not exist yet? // Look for the unknowns then... // 0. Get a distinct collection of all useragents linked to (unknown) client linkedUserAgents = await Warehouse.GetUnknownUserAgents(connection); } // 1. Parse them and detect the ones that are recognized by the parser // These user agents are linked to newly parsed client dimensions. currentUserAgentInfo = ParseUserAgentsAndLinkToClientDimension(linkedUserAgents); } if (currentUserAgentInfo != null && currentUserAgentInfo.Any()) { // 2. Enumerate recognized user agents and ensure dimensions exists // This resultset may contain updated links between user agent and client dimension id. var updatedUserAgentInfo = await Warehouse.EnsureClientDimensionsExist(connection, currentUserAgentInfo); // 3. Determine the updated links between user agent and client dimension ID // by comparing the resultset of step 2 with the original links found in step 0. var changedLinks = FindChangedLinksBetweenUserAgentAndClientDimensionId(currentUserAgentInfo, updatedUserAgentInfo); // 4. Link the new client dimension to the facts if (changedLinks.Any()) { await Warehouse.PatchClientDimension(connection, changedLinks); } } } } catch (Exception exception) { Trace.TraceError(exception.ToString()); } }
public override async Task Run() { // Gather download counts data from statistics warehouse IReadOnlyList <DownloadCountData> downloadData; Logger.LogInformation("Using batch size {BatchSize} and batch sleep seconds {BatchSleepSeconds}.", _batchSize, _batchSleepSeconds); Logger.LogInformation("Gathering Download Counts from {DataSource}/{InitialCatalog}...", _statisticsDatabase.DataSource, _statisticsDatabase.InitialCatalog); var stopwatch = Stopwatch.StartNew(); using (var statisticsDatabase = await _statisticsDatabase.ConnectTo()) using (var statisticsDatabaseTransaction = statisticsDatabase.BeginTransaction(IsolationLevel.Snapshot)) { downloadData = ( await statisticsDatabase.QueryWithRetryAsync <DownloadCountData>( _storedProcedureName, transaction: statisticsDatabaseTransaction, commandType: CommandType.StoredProcedure, commandTimeout: TimeSpan.FromMinutes(15), maxRetries: 3)) .ToList(); } Logger.LogInformation( "Gathered {RecordCount} rows of data (took {DurationSeconds} seconds).", downloadData.Count, stopwatch.Elapsed.TotalSeconds); if (!downloadData.Any()) { Logger.LogInformation("No download data to process."); return; } using (var destinationDatabase = await _destinationDatabase.ConnectTo()) { // Fetch package registrations so we can match package ID to package registration key. var packageRegistrationLookup = await GetPackageRegistrations(destinationDatabase); // Group based on package ID and store in a stack for easy incremental processing. var allGroups = downloadData.GroupBy(p => p.PackageId).ToList(); var filteredGroups = allGroups.Where(g => IsValidGroup(packageRegistrationLookup, g)).ToList(); var removedCount = allGroups.Count - filteredGroups.Count; Logger.LogInformation("{TotalGroupCount} package ID groups were found in the statistics database.", allGroups.Count); Logger.LogInformation("{RemovedGroupCount} package ID groups were filtered out because they aren't in the gallery database.", removedCount); Logger.LogInformation("{RemainingGroupCount} package ID groups will be processed.", filteredGroups.Count); var remainingGroups = new Stack <IPackageIdGroup>(filteredGroups); stopwatch.Restart(); while (remainingGroups.Any()) { // Create a batch of one or more package registrations to update. var batch = PopGroupBatch(remainingGroups, _batchSize); await ProcessBatch(batch, destinationDatabase, packageRegistrationLookup); Logger.LogInformation( "There are {GroupCount} package registration groups remaining.", remainingGroups.Count); if (remainingGroups.Any()) { Logger.LogInformation("Sleeping for {BatchSleepSeconds} seconds before continuing.", _batchSleepSeconds); await Task.Delay(TimeSpan.FromSeconds(_batchSleepSeconds)); } } Logger.LogInformation( "It took {DurationSeconds} seconds to update all download counts.", stopwatch.Elapsed.TotalSeconds); } }
public override async Task <bool> Run() { try { List <ExpiredCredentialData> expiredCredentials = null; // Who did we contact before? if (_storage.Exists(_cursorFile)) { string content = await _storage.LoadString(_storage.ResolveUri(_cursorFile), CancellationToken.None); // Load from cursor var contactedUsers = JsonConvert.DeserializeObject <Dictionary <string, DateTimeOffset> >(content); // Clean older entries (contacted in last _allowEmailResendAfterDays) var referenceDate = DateTimeOffset.UtcNow.AddDays(-1 * _allowEmailResendAfterDays); foreach (var kvp in contactedUsers.Where(kvp => kvp.Value >= referenceDate)) { _contactedUsers.AddOrUpdate(kvp.Key, kvp.Value, (s, offset) => kvp.Value); } } // Connect to database using (var galleryConnection = await _galleryDatabase.ConnectTo()) { // Fetch credentials that expire in _warnDaysBeforeExpiration days // + the user's e-mail address _logger.LogInformation("Retrieving expired credentials from {InitialCatalog}...", _galleryDatabase.InitialCatalog); expiredCredentials = (await galleryConnection.QueryWithRetryAsync <ExpiredCredentialData>( string.Format(Strings.GetExpiredCredentialsQuery, _warnDaysBeforeExpiration), maxRetries: 3, commandTimeout: _defaultCommandTimeout)).ToList(); _logger.LogInformation("Retrieved {ExpiredCredentials} expired credentials.", expiredCredentials.Count); } // Add default description for non-scoped API keys expiredCredentials .Where(cred => string.IsNullOrEmpty(cred.Description)) .ToList() .ForEach(ecd => ecd.Description = Constants.NonScopedApiKeyDescription); // Group credentials for each user var userToExpiredCredsMapping = expiredCredentials .GroupBy(x => x.Username) .ToDictionary(user => user.Key, value => value.ToList()); // Handle expiring credentials var jobRunTime = DateTimeOffset.UtcNow; foreach (var userCredMapping in userToExpiredCredsMapping) { var username = userCredMapping.Key; var credentialList = userCredMapping.Value; // Split credentials into two lists: Expired and Expiring to aggregate messages var expiringCredentialList = credentialList .Where(x => (x.Expires - jobRunTime).TotalDays > 0) .ToList(); var expiredCredentialList = credentialList .Where(x => (x.Expires - jobRunTime).TotalDays <= 0) .ToList(); DateTimeOffset userContactTime; if (!_contactedUsers.TryGetValue(username, out userContactTime)) { // send expiring API keys email notification await HandleExpiredCredentialEmail(username, expiringCredentialList, jobRunTime, expired : false); // send expired API keys email notification await HandleExpiredCredentialEmail(username, expiredCredentialList, jobRunTime, expired : true); } else { _logger.LogDebug("Skipping expired credential for user {Username} - already handled at {JobRuntime}.", username, userContactTime); } } } catch (Exception ex) { _logger.LogCritical(LogEvents.JobRunFailed, ex, "Job run failed!"); return(false); } finally { // Make sure we know who has been contacted today, so they do not get double // e-mail notifications. string json = JsonConvert.SerializeObject(_contactedUsers); var content = new StringStorageContent(json, "application/json"); await _storage.Save(_storage.ResolveUri(_cursorFile), content, CancellationToken.None); } return(true); }
public override async Task <bool> Run() { try { // Gather download counts data from statistics warehouse IReadOnlyCollection <DownloadCountData> downloadData; _logger.LogDebug("Gathering Download Counts from {0}/{1}...", _statisticsDatabase.DataSource, _statisticsDatabase.InitialCatalog); using (var statisticsDatabase = await _statisticsDatabase.ConnectTo()) using (var statisticsDatabaseTransaction = statisticsDatabase.BeginTransaction(IsolationLevel.Snapshot)) { downloadData = ( await statisticsDatabase.QueryWithRetryAsync <DownloadCountData>( _storedProcedureName, transaction: statisticsDatabaseTransaction, commandType: CommandType.StoredProcedure, commandTimeout: TimeSpan.FromMinutes(15), maxRetries: 3)) .ToList(); } _logger.LogInformation("Gathered {RecordCount} rows of data.", downloadData.Count); if (downloadData.Any()) { // Group based on Package Id var packageRegistrationGroups = downloadData.GroupBy(p => p.PackageId).ToList(); using (var destinationDatabase = await _destinationDatabase.ConnectTo()) { // Fetch package registrations so we can match var packageRegistrationLookup = await GetPackageRegistrations(destinationDatabase); // Create a temporary table _logger.LogDebug("Creating temporary table..."); await destinationDatabase.ExecuteAsync(_createTempTable); // Load temporary table var aggregateCdnDownloadsInGalleryTable = new DataTable(); var command = new SqlCommand("SELECT * FROM " + _tempTableName, destinationDatabase); command.CommandType = CommandType.Text; command.CommandTimeout = (int)TimeSpan.FromMinutes(10).TotalSeconds; var reader = await command.ExecuteReaderAsync(); aggregateCdnDownloadsInGalleryTable.Load(reader); aggregateCdnDownloadsInGalleryTable.Rows.Clear(); aggregateCdnDownloadsInGalleryTable.TableName = $"dbo.{_tempTableName}"; _logger.LogInformation("Created temporary table."); // Populate temporary table in memory _logger.LogDebug("Populating temporary table in memory..."); foreach (var packageRegistrationGroup in packageRegistrationGroups) { // don't process empty package id's if (string.IsNullOrEmpty(packageRegistrationGroup.First().PackageId)) { continue; } var packageId = packageRegistrationGroup.First().PackageId.ToLowerInvariant(); // Get package registration key if (!packageRegistrationLookup.ContainsKey(packageId)) { continue; } var packageRegistrationKey = packageRegistrationLookup[packageId]; // Set download count on individual packages foreach (var package in packageRegistrationGroup) { var row = aggregateCdnDownloadsInGalleryTable.NewRow(); row["PackageRegistrationKey"] = packageRegistrationKey; row["PackageVersion"] = package.PackageVersion; row["DownloadCount"] = package.TotalDownloadCount; aggregateCdnDownloadsInGalleryTable.Rows.Add(row); } } _logger.LogInformation("Populated temporary table in memory. ({RecordCount} rows).", aggregateCdnDownloadsInGalleryTable.Rows.Count); // Transfer to SQL database _logger.LogDebug("Populating temporary table in database..."); using (SqlBulkCopy bulkcopy = new SqlBulkCopy(destinationDatabase)) { bulkcopy.BulkCopyTimeout = (int)TimeSpan.FromMinutes(30).TotalSeconds; bulkcopy.DestinationTableName = _tempTableName; bulkcopy.WriteToServer(aggregateCdnDownloadsInGalleryTable); bulkcopy.Close(); } _logger.LogInformation("Populated temporary table in database."); // Update counts in destination database _logger.LogDebug("Updating destination database Download Counts... ({RecordCount} package registrations to process).", packageRegistrationGroups.Count()); await destinationDatabase.ExecuteAsync(_updateFromTempTable, commandTimeout : TimeSpan.FromMinutes(30)); _logger.LogInformation("Updated destination database Download Counts."); } } } catch (Exception exception) { _logger.LogCritical("Job run failed {Exception}!", exception); return(false); } return(true); }