public async Task RunSeedingAsync(CancellationToken cancellationToken) { _logger.LogInformation("Start seeding data of 14 days"); if (_apiSvc.CurrentIdentity == null) { var id = await _apiSvc.LoginAsync(); if (id == null) { _logger.LogError("MW failed to log in"); return; } } var end = DateTimeOffset.UtcNow; var b = end.Subtract(TimeSpan.FromDays(14)); int retryCount = 0; do { try { var tsSamples = new List <ContribSampleEntity>(); var s = b.AddHours(1); var changes = await _apiSvc.GetRecentChangesSinceAsync(b, 0, s); _logger.LogInformation($"Get changes from {b} to {s}: {changes.Count} items"); var userGrouping = changes.Where(g => g.User != null).GroupBy(g => g.User); foreach (var g in userGrouping) { var count = g.Count(); tsSamples.Add(new ContribSampleEntity(g.Key, b, count)); } await _tsdb.IngestSamplesAsync(tsSamples, cancellationToken); b = s; retryCount = 0; } catch (Exception exc) { _logger.LogError(exc, "Caught an error and enter retry."); if (retryCount > 3) { break; } else { retryCount++; } } }while (b < end && !cancellationToken.IsCancellationRequested); await _tsdb.FlushCacheAsync(cancellationToken); _logger.LogInformation("Seeding completed"); }
public async Task RunMainLoopAsync(CancellationToken cancellationToken) { DateTimeOffset lastAccess; while (!cancellationToken.IsCancellationRequested) { // For long-run, token will self-refresh. if (DateTimeOffset.Now - _lastLogin > LoginValidity) { _logger.LogInformation("Refresh MW token"); // 3 retries with some cool-down period IPrincipal principal = null; for (int i = 0; i < 3; i++) { principal = await RefreshSessionAsync(); if (principal == null) { _logger.LogWarning("MW Failed to log in; retries after 20s"); await Task.Delay(TimeSpan.FromSeconds(20), cancellationToken); } else { break; } } if (principal == null) { _logger.LogError("MW failed to log in; worker exit"); return; } } // Also run GC periodically if (DateTimeOffset.Now - _lastGcRun > TimeSpan.FromDays(14)) { try { await _tsdb.RunGarbageCollectionInXTableAsync(cancellationToken); _lastGcRun = DateTimeOffset.Now; } catch (Exception exc) { _logger.LogError(exc, "Failed to run TS GC"); } } // Get last access time stamp from database lastAccess = _stateRepo.Get <DateTimeOffset>(nameof(lastAccess)); var currentTime = DateTimeOffset.Now; List <ContentPageChangeEventArgs> changeLists = null; try { changeLists = await _apiSvc.GetRecentChangesSinceAsync(lastAccess); } catch (Exception exc) { _logger.LogError(exc, "MW failed to retrieve recent changes"); _telemetryClient.TrackException(exc, new Dictionary <string, string> { { "Category", "GetRecentChangesSinceAsync" } }); await Task.Delay(TimeSpan.FromSeconds(15), cancellationToken); continue; } _logger.LogInformation($"{changeLists.Count} item(s) retrieved"); _telemetryClient.TrackEvent("NewItemIngestion", null, new Dictionary <string, double> { { "Count", changeLists.Count } }); // Update time stamp if new item retrieved if (changeLists.Count > 0) { lastAccess = currentTime; _stateRepo.Put(nameof(lastAccess), lastAccess); } // Enqueue update events into message topics, send in batch manner var clusteredUpdates = changeLists.Where(i => i.Title != null).GroupBy(i => i.Title); var updateMessages = new List <Message>(); foreach (var updateSeries in clusteredUpdates) { var sortedSeries = updateSeries.OrderByDescending(i => i.ChangesetId); var topItem = sortedSeries.First(); var memoryStream = new MemoryStream(); _formatter.Serialize(memoryStream, topItem); updateMessages.Add(new Message(memoryStream.GetBuffer())); if (updateMessages.Count > 50) { await SendBatchMessagesWithRetry(updateMessages); updateMessages.Clear(); } } // Final one in message bus await SendBatchMessagesWithRetry(updateMessages); updateMessages.Clear(); // Ingest edit stats into TS try { var tsSamples = changeLists.Where(g => g.User != null).Select(k => new ContribSampleEntity(k.User, k.EventTimeStamp, 1)); await _tsdb.IngestSamplesAsync(tsSamples.ToList(), cancellationToken); } catch (Exception exc) { _logger.LogError(exc, "TS failed to ingest edits"); } // Flush TSDB cache every hour if (DateTimeOffset.Now - _lastCacheFlush >= TimeSpan.FromHours(1)) { await _tsdb.FlushCacheAsync(cancellationToken); _lastCacheFlush = DateTimeOffset.Now; } await Task.Delay(TimeSpan.FromSeconds(15), cancellationToken); } // Flush local TSDB cache await _tsdb.FlushCacheAsync(default(CancellationToken)); }