/// <summary> /// Retrieves a list of threads that are present on the board's archive, but only ones updated after the specified time. /// </summary> /// <param name="token">Token to cancel the request.</param> /// <param name="board">The board to retrieve threads from.</param> /// <param name="lastDateTimeCheck">The time to compare the thread's updated time to.</param> /// <returns>A list of thread IDs.</returns> private async Task <IList <ThreadPointer> > GetArchivedBoardThreads(CancellationToken token, string board, DateTimeOffset lastDateTimeCheck) { var cooldownTask = Task.Delay(ApiCooldownTimespan, token); var threadQueue = new List <ThreadPointer>(); var archiveRequest = await NetworkPolicies.GenericRetryPolicy <ApiResponse <ulong[]> >(12).ExecuteAsync(async() => { token.ThrowIfCancellationRequested(); await using var boardClient = await ProxyProvider.RentHttpClient(); return(await YotsubaApi.GetArchive(board, boardClient.Object.Client, lastDateTimeCheck, token)); }); switch (archiveRequest.ResponseType) { case ResponseType.Ok: var existingArchivedThreads = await ThreadConsumer.CheckExistingThreads(archiveRequest.Data, board, true, false); Program.Log($"Found {existingArchivedThreads.Count} existing archived threads for board /{board}/"); foreach (ulong nonExistingThreadId in archiveRequest.Data.Except(existingArchivedThreads.Select(x => x.threadId))) { threadQueue.Add(new ThreadPointer(board, nonExistingThreadId)); } Program.Log($"Enqueued {threadQueue.Count} threads from board archive /{board}/"); break; case ResponseType.NotModified: break; case ResponseType.NotFound: default: Program.Log($"Unable to index the archive of board /{board}/, is there a connection error?"); break; } await cooldownTask; return(threadQueue); }
public async Task Execute(CancellationToken token) { var semaphoreTask = SemaphoreUpdateTask(token); var threadSemaphore = new SemaphoreSlim(20); bool firstRun = true; HashSet <(string board, ulong threadNumber)> threadQueue = new HashSet <(string board, ulong threadNumber)>(); SortedList <string, DateTimeOffset> lastBoardCheckTimes = new SortedList <string, DateTimeOffset>(Config.Boards.Length); while (!token.IsCancellationRequested) { foreach (string board in Config.Boards) { if (!lastBoardCheckTimes.TryGetValue(board, out DateTimeOffset lastDateTimeCheck)) { lastDateTimeCheck = DateTimeOffset.MinValue; } uint lastCheckTimestamp = firstRun ? 0 : Utility.GetNewYorkTimestamp(lastDateTimeCheck); DateTimeOffset beforeCheckTime = DateTimeOffset.Now; await APISemaphore.WaitAsync(token); var pagesRequest = await YotsubaApi.GetBoard(board, lastDateTimeCheck, token); switch (pagesRequest.ResponseType) { case YotsubaResponseType.Ok: int newCount = 0; foreach (var thread in pagesRequest.Pages.SelectMany(x => x.Threads).ToArray()) { if (thread.LastModified > lastCheckTimestamp) { threadQueue.Add((board, thread.ThreadNumber)); newCount++; } } Program.Log($"Enqueued {newCount} threads from board /{board}/ past timestamp {lastCheckTimestamp}"); break; case YotsubaResponseType.NotModified: break; case YotsubaResponseType.NotFound: default: Program.Log($"Unable to index board /{board}/, is there a connection error?"); break; } if (firstRun) { await APISemaphore.WaitAsync(token); var archiveRequest = await YotsubaApi.GetArchive(board, lastDateTimeCheck, token); switch (archiveRequest.ResponseType) { case YotsubaResponseType.Ok: var existingArchivedThreads = await ThreadConsumer.CheckExistingThreads(archiveRequest.ThreadIds, board, true); Program.Log($"Found {existingArchivedThreads.Length} existing archived threads for board /{board}/"); int count = 0; foreach (ulong nonExistingThreadId in archiveRequest.ThreadIds.Except(existingArchivedThreads)) { threadQueue.Add((board, nonExistingThreadId)); count++; } Program.Log($"Enqueued {count} threads from board archive /{board}/"); break; case YotsubaResponseType.NotModified: break; case YotsubaResponseType.NotFound: default: Program.Log($"Unable to index the archive of board /{board}/, is there a connection error?"); break; } } lastBoardCheckTimes[board] = beforeCheckTime; } Program.Log($"{threadQueue.Count} threads have been queued total"); var waitTask = Task.Delay(BoardUpdateTimespan, token); var weakReferences = new List <WeakReference <Task> >(); var requeuedThreads = new HashSet <(string board, ulong threadNumber)>(); int completedCount = 0; var roundRobinQueue = threadQueue.GroupBy(s => s.board) .SelectMany(grp => grp.Select((str, idx) => new { Index = idx, Value = str })) .OrderBy(v => v.Index).ThenBy(v => v.Value.board) .Select(v => v.Value); foreach (var thread in roundRobinQueue) { if (completedCount % 50 == 0) { Program.Log($" --> Completed {completedCount} / {threadQueue.Count}. {threadQueue.Count - completedCount} to go"); } await threadSemaphore.WaitAsync(); weakReferences.Add(new WeakReference <Task>(Task.Run(async() => { bool success = await ThreadUpdateTask(CancellationToken.None, thread.board, thread.threadNumber); if (!success) { requeuedThreads.Add(thread); } threadSemaphore.Release(); }))); completedCount++; } foreach (var updateTask in weakReferences) { if (updateTask.TryGetTarget(out var task)) { await task; } } firstRun = false; System.Runtime.GCSettings.LargeObjectHeapCompactionMode = System.Runtime.GCLargeObjectHeapCompactionMode.CompactOnce; GC.Collect(); threadQueue.Clear(); foreach (var requeuedThread in requeuedThreads) { threadQueue.Add(requeuedThread); } await waitTask; } }