/// <summary> /// Retrieves a list of threads that are present on the board, but only ones updated after the specified time. /// </summary> /// <param name="token">Token to cancel the request.</param> /// <param name="board">The board to retrieve threads from.</param> /// <param name="lastDateTimeCheck">The time to compare the thread's updated time to.</param> /// <param name="firstRun">True if this is the first cycle in the archival loop, otherwise false. Controls whether or not the database is called to find existing threads</param> /// <returns>A list of thread IDs.</returns> public async Task <IList <ThreadPointer> > GetBoardThreads(CancellationToken token, string board, DateTimeOffset lastDateTimeCheck, bool firstRun) { var cooldownTask = Task.Delay(ApiCooldownTimespan, token); var threads = new List <ThreadPointer>(); var pagesRequest = await NetworkPolicies.GenericRetryPolicy <ApiResponse <Page[]> >(12).ExecuteAsync(async() => { Program.Log($"Requesting threads from board /{board}/..."); await using var boardClient = await ProxyProvider.RentHttpClient(); return(await YotsubaApi.GetBoard(board, boardClient.Object.Client, lastDateTimeCheck, token)); }); switch (pagesRequest.ResponseType) { case ResponseType.Ok: var threadList = pagesRequest.Data.SelectMany(x => x.Threads).ToList(); if (firstRun) { var existingThreads = await ThreadConsumer.CheckExistingThreads(threadList.Select(x => x.ThreadNumber), board, false, true); foreach (var existingThread in existingThreads) { var thread = threadList.First(x => x.ThreadNumber == existingThread.threadId); if (thread.LastModified <= Utility.GetGMTTimestamp(existingThread.lastPostTime)) { threadList.Remove(thread); } } } uint lastCheckTimestamp = firstRun ? 0 : Utility.GetGMTTimestamp(lastDateTimeCheck); foreach (var thread in threadList) { if (thread.LastModified > lastCheckTimestamp) { threads.Add(new ThreadPointer(board, thread.ThreadNumber)); } } Program.Log($"Enqueued {threads.Count} threads from board /{board}/ past timestamp {lastCheckTimestamp}"); break; case ResponseType.NotModified: break; case ResponseType.NotFound: default: Program.Log($"Unable to index board /{board}/, is there a connection error?"); break; } await cooldownTask; return(threads); }
public async Task Execute(CancellationToken token) { var semaphoreTask = SemaphoreUpdateTask(token); var threadSemaphore = new SemaphoreSlim(20); bool firstRun = true; HashSet <(string board, ulong threadNumber)> threadQueue = new HashSet <(string board, ulong threadNumber)>(); SortedList <string, DateTimeOffset> lastBoardCheckTimes = new SortedList <string, DateTimeOffset>(Config.Boards.Length); while (!token.IsCancellationRequested) { foreach (string board in Config.Boards) { if (!lastBoardCheckTimes.TryGetValue(board, out DateTimeOffset lastDateTimeCheck)) { lastDateTimeCheck = DateTimeOffset.MinValue; } uint lastCheckTimestamp = firstRun ? 0 : Utility.GetNewYorkTimestamp(lastDateTimeCheck); DateTimeOffset beforeCheckTime = DateTimeOffset.Now; await APISemaphore.WaitAsync(token); var pagesRequest = await YotsubaApi.GetBoard(board, lastDateTimeCheck, token); switch (pagesRequest.ResponseType) { case YotsubaResponseType.Ok: int newCount = 0; foreach (var thread in pagesRequest.Pages.SelectMany(x => x.Threads).ToArray()) { if (thread.LastModified > lastCheckTimestamp) { threadQueue.Add((board, thread.ThreadNumber)); newCount++; } } Program.Log($"Enqueued {newCount} threads from board /{board}/ past timestamp {lastCheckTimestamp}"); break; case YotsubaResponseType.NotModified: break; case YotsubaResponseType.NotFound: default: Program.Log($"Unable to index board /{board}/, is there a connection error?"); break; } if (firstRun) { await APISemaphore.WaitAsync(token); var archiveRequest = await YotsubaApi.GetArchive(board, lastDateTimeCheck, token); switch (archiveRequest.ResponseType) { case YotsubaResponseType.Ok: var existingArchivedThreads = await ThreadConsumer.CheckExistingThreads(archiveRequest.ThreadIds, board, true); Program.Log($"Found {existingArchivedThreads.Length} existing archived threads for board /{board}/"); int count = 0; foreach (ulong nonExistingThreadId in archiveRequest.ThreadIds.Except(existingArchivedThreads)) { threadQueue.Add((board, nonExistingThreadId)); count++; } Program.Log($"Enqueued {count} threads from board archive /{board}/"); break; case YotsubaResponseType.NotModified: break; case YotsubaResponseType.NotFound: default: Program.Log($"Unable to index the archive of board /{board}/, is there a connection error?"); break; } } lastBoardCheckTimes[board] = beforeCheckTime; } Program.Log($"{threadQueue.Count} threads have been queued total"); var waitTask = Task.Delay(BoardUpdateTimespan, token); var weakReferences = new List <WeakReference <Task> >(); var requeuedThreads = new HashSet <(string board, ulong threadNumber)>(); int completedCount = 0; var roundRobinQueue = threadQueue.GroupBy(s => s.board) .SelectMany(grp => grp.Select((str, idx) => new { Index = idx, Value = str })) .OrderBy(v => v.Index).ThenBy(v => v.Value.board) .Select(v => v.Value); foreach (var thread in roundRobinQueue) { if (completedCount % 50 == 0) { Program.Log($" --> Completed {completedCount} / {threadQueue.Count}. {threadQueue.Count - completedCount} to go"); } await threadSemaphore.WaitAsync(); weakReferences.Add(new WeakReference <Task>(Task.Run(async() => { bool success = await ThreadUpdateTask(CancellationToken.None, thread.board, thread.threadNumber); if (!success) { requeuedThreads.Add(thread); } threadSemaphore.Release(); }))); completedCount++; } foreach (var updateTask in weakReferences) { if (updateTask.TryGetTarget(out var task)) { await task; } } firstRun = false; System.Runtime.GCSettings.LargeObjectHeapCompactionMode = System.Runtime.GCLargeObjectHeapCompactionMode.CompactOnce; GC.Collect(); threadQueue.Clear(); foreach (var requeuedThread in requeuedThreads) { threadQueue.Add(requeuedThread); } await waitTask; } }