/// <summary> /// Polls a thread, and passes it to the consumer if the thread has been detected as updated. /// </summary> /// <param name="token">The cancellation token associated with this request.</param> /// <param name="board">The board of the thread.</param> /// <param name="threadNumber">The post number of the thread to poll.</param> /// <param name="client">The <see cref="HttpClientProxy"/> to use for the poll request.</param> /// <returns></returns> private async Task <ThreadUpdateTaskResult> ThreadUpdateTask(CancellationToken token, string workerId, string board, ulong threadNumber, HttpClientProxy client) { try { Program.Log($"{workerId,-2}: Polling thread /{board}/{threadNumber}", true); var response = await YotsubaApi.GetThread(board, threadNumber, client.Client, null, token); token.ThrowIfCancellationRequested(); switch (response.ResponseType) { case ResponseType.Ok: Program.Log($"{workerId,-2}: Downloading changes from thread /{board}/{threadNumber}", true); var images = await ThreadConsumer.ConsumeThread(response.Data, board); if (response.Data.OriginalPost.Archived == true) { Program.Log($"{workerId,-2}: Thread /{board}/{threadNumber} has been archived", true); await ThreadConsumer.ThreadUntracked(threadNumber, board, false); } return(new ThreadUpdateTaskResult(true, images, response.Data.OriginalPost.Archived == true ? ThreadUpdateStatus.Archived : ThreadUpdateStatus.Ok, response.Data.Posts.Length)); case ResponseType.NotModified: return(new ThreadUpdateTaskResult(true, new QueuedImageDownload[0], ThreadUpdateStatus.NotModified, response.Data.Posts.Length)); case ResponseType.NotFound: Program.Log($"{workerId,-2}: Thread /{board}/{threadNumber} has been pruned or deleted", true); await ThreadConsumer.ThreadUntracked(threadNumber, board, true); return(new ThreadUpdateTaskResult(true, new QueuedImageDownload[0], ThreadUpdateStatus.Deleted, 0)); default: throw new ArgumentOutOfRangeException(); } } catch (Exception exception) { Program.Log($"ERROR: Could not poll or update thread /{board}/{threadNumber}. Will try again next board update\nClient name: {client.Name}\nException: {exception}"); return(new ThreadUpdateTaskResult(false, new QueuedImageDownload[0], ThreadUpdateStatus.Error, 0)); } }
private async Task <bool> ThreadUpdateTask(CancellationToken token, string board, ulong threadNumber) { try { await APISemaphore.WaitAsync(token); Program.Log($"Polling thread /{board}/{threadNumber}"); var response = await YotsubaApi.GetThread(board, threadNumber, null, token); switch (response.ResponseType) { case YotsubaResponseType.Ok: Program.Log($"Downloading changes from thread /{board}/{threadNumber}"); await ThreadConsumer.ConsumeThread(response.Thread, board); if (response.Thread.OriginalPost.Archived == true) { Program.Log($"Thread /{board}/{threadNumber} has been archived"); await ThreadConsumer.ThreadUntracked(threadNumber, board, false); } return(true); case YotsubaResponseType.NotModified: return(true); case YotsubaResponseType.NotFound: Program.Log($"Thread /{board}/{threadNumber} has been pruned or deleted"); await ThreadConsumer.ThreadUntracked(threadNumber, board, true); return(true); default: throw new ArgumentOutOfRangeException(); } } catch (Exception exception) { Program.Log($"ERROR: Could not poll or update thread /{board}/{threadNumber}. Will try again next board update\nException: {exception}"); return(false); } }
/// <summary> /// Polls a thread, and passes it to the consumer if the thread has been detected as updated. /// </summary> /// <param name="token">The cancellation token associated with this request.</param> /// <param name="board">The board of the thread.</param> /// <param name="threadNumber">The post number of the thread to poll.</param> /// <param name="client">The <see cref="HttpClientProxy"/> to use for the poll request.</param> /// <returns></returns> private async Task <(bool success, IList <QueuedImageDownload> imageDownloads)> ThreadUpdateTask(CancellationToken token, string board, ulong threadNumber, HttpClientProxy client) { try { Program.Log($"Polling thread /{board}/{threadNumber}"); var response = await YotsubaApi.GetThread(board, threadNumber, client.Client, null, token); switch (response.ResponseType) { case ResponseType.Ok: Program.Log($"Downloading changes from thread /{board}/{threadNumber}"); var images = await ThreadConsumer.ConsumeThread(response.Data, board); if (response.Data.OriginalPost.Archived == true) { Program.Log($"Thread /{board}/{threadNumber} has been archived"); await ThreadConsumer.ThreadUntracked(threadNumber, board, false); } return(true, images); case ResponseType.NotModified: return(true, new QueuedImageDownload[0]); case ResponseType.NotFound: Program.Log($"Thread /{board}/{threadNumber} has been pruned or deleted"); await ThreadConsumer.ThreadUntracked(threadNumber, board, true); return(true, new QueuedImageDownload[0]); default: throw new ArgumentOutOfRangeException(); } } catch (Exception exception) { Program.Log($"ERROR: Could not poll or update thread /{board}/{threadNumber}. Will try again next board update\nClient name: {client.Name}\nException: {exception}"); return(false, null); } }
/// <summary> /// Retrieves a list of threads that are present on the board's archive, but only ones updated after the specified time. /// </summary> /// <param name="token">Token to cancel the request.</param> /// <param name="board">The board to retrieve threads from.</param> /// <param name="lastDateTimeCheck">The time to compare the thread's updated time to.</param> /// <returns>A list of thread IDs.</returns> private async Task <IList <ThreadPointer> > GetArchivedBoardThreads(CancellationToken token, string board, DateTimeOffset lastDateTimeCheck) { var cooldownTask = Task.Delay(ApiCooldownTimespan, token); var threadQueue = new List <ThreadPointer>(); var archiveRequest = await NetworkPolicies.GenericRetryPolicy <ApiResponse <ulong[]> >(12).ExecuteAsync(async() => { token.ThrowIfCancellationRequested(); await using var boardClient = await ProxyProvider.RentHttpClient(); return(await YotsubaApi.GetArchive(board, boardClient.Object.Client, lastDateTimeCheck, token)); }); switch (archiveRequest.ResponseType) { case ResponseType.Ok: var existingArchivedThreads = await ThreadConsumer.CheckExistingThreads(archiveRequest.Data, board, true, false); Program.Log($"Found {existingArchivedThreads.Count} existing archived threads for board /{board}/"); foreach (ulong nonExistingThreadId in archiveRequest.Data.Except(existingArchivedThreads.Select(x => x.threadId))) { threadQueue.Add(new ThreadPointer(board, nonExistingThreadId)); } Program.Log($"Enqueued {threadQueue.Count} threads from board archive /{board}/"); break; case ResponseType.NotModified: break; case ResponseType.NotFound: default: Program.Log($"Unable to index the archive of board /{board}/, is there a connection error?"); break; } await cooldownTask; return(threadQueue); }
/// <summary> /// Retrieves a list of threads that are present on the board, but only ones updated after the specified time. /// </summary> /// <param name="token">Token to cancel the request.</param> /// <param name="board">The board to retrieve threads from.</param> /// <param name="lastDateTimeCheck">The time to compare the thread's updated time to.</param> /// <param name="firstRun">True if this is the first cycle in the archival loop, otherwise false. Controls whether or not the database is called to find existing threads</param> /// <returns>A list of thread IDs.</returns> public async Task <IList <ThreadPointer> > GetBoardThreads(CancellationToken token, string board, DateTimeOffset lastDateTimeCheck, bool firstRun) { var cooldownTask = Task.Delay(ApiCooldownTimespan, token); var threads = new List <ThreadPointer>(); var pagesRequest = await NetworkPolicies.GenericRetryPolicy <ApiResponse <Page[]> >(12).ExecuteAsync(async() => { Program.Log($"Requesting threads from board /{board}/..."); await using var boardClient = await ProxyProvider.RentHttpClient(); return(await YotsubaApi.GetBoard(board, boardClient.Object.Client, lastDateTimeCheck, token)); }); switch (pagesRequest.ResponseType) { case ResponseType.Ok: var threadList = pagesRequest.Data.SelectMany(x => x.Threads).ToList(); if (firstRun) { var existingThreads = await ThreadConsumer.CheckExistingThreads(threadList.Select(x => x.ThreadNumber), board, false, true); foreach (var existingThread in existingThreads) { var thread = threadList.First(x => x.ThreadNumber == existingThread.threadId); if (thread.LastModified <= Utility.GetGMTTimestamp(existingThread.lastPostTime)) { threadList.Remove(thread); } } } uint lastCheckTimestamp = firstRun ? 0 : Utility.GetGMTTimestamp(lastDateTimeCheck); foreach (var thread in threadList) { if (thread.LastModified > lastCheckTimestamp) { threads.Add(new ThreadPointer(board, thread.ThreadNumber)); } } Program.Log($"Enqueued {threads.Count} threads from board /{board}/ past timestamp {lastCheckTimestamp}"); break; case ResponseType.NotModified: break; case ResponseType.NotFound: default: Program.Log($"Unable to index board /{board}/, is there a connection error?"); break; } await cooldownTask; return(threads); }
public async Task Execute(CancellationToken token) { var semaphoreTask = SemaphoreUpdateTask(token); var threadSemaphore = new SemaphoreSlim(20); bool firstRun = true; HashSet <(string board, ulong threadNumber)> threadQueue = new HashSet <(string board, ulong threadNumber)>(); SortedList <string, DateTimeOffset> lastBoardCheckTimes = new SortedList <string, DateTimeOffset>(Config.Boards.Length); while (!token.IsCancellationRequested) { foreach (string board in Config.Boards) { if (!lastBoardCheckTimes.TryGetValue(board, out DateTimeOffset lastDateTimeCheck)) { lastDateTimeCheck = DateTimeOffset.MinValue; } uint lastCheckTimestamp = firstRun ? 0 : Utility.GetNewYorkTimestamp(lastDateTimeCheck); DateTimeOffset beforeCheckTime = DateTimeOffset.Now; await APISemaphore.WaitAsync(token); var pagesRequest = await YotsubaApi.GetBoard(board, lastDateTimeCheck, token); switch (pagesRequest.ResponseType) { case YotsubaResponseType.Ok: int newCount = 0; foreach (var thread in pagesRequest.Pages.SelectMany(x => x.Threads).ToArray()) { if (thread.LastModified > lastCheckTimestamp) { threadQueue.Add((board, thread.ThreadNumber)); newCount++; } } Program.Log($"Enqueued {newCount} threads from board /{board}/ past timestamp {lastCheckTimestamp}"); break; case YotsubaResponseType.NotModified: break; case YotsubaResponseType.NotFound: default: Program.Log($"Unable to index board /{board}/, is there a connection error?"); break; } if (firstRun) { await APISemaphore.WaitAsync(token); var archiveRequest = await YotsubaApi.GetArchive(board, lastDateTimeCheck, token); switch (archiveRequest.ResponseType) { case YotsubaResponseType.Ok: var existingArchivedThreads = await ThreadConsumer.CheckExistingThreads(archiveRequest.ThreadIds, board, true); Program.Log($"Found {existingArchivedThreads.Length} existing archived threads for board /{board}/"); int count = 0; foreach (ulong nonExistingThreadId in archiveRequest.ThreadIds.Except(existingArchivedThreads)) { threadQueue.Add((board, nonExistingThreadId)); count++; } Program.Log($"Enqueued {count} threads from board archive /{board}/"); break; case YotsubaResponseType.NotModified: break; case YotsubaResponseType.NotFound: default: Program.Log($"Unable to index the archive of board /{board}/, is there a connection error?"); break; } } lastBoardCheckTimes[board] = beforeCheckTime; } Program.Log($"{threadQueue.Count} threads have been queued total"); var waitTask = Task.Delay(BoardUpdateTimespan, token); var weakReferences = new List <WeakReference <Task> >(); var requeuedThreads = new HashSet <(string board, ulong threadNumber)>(); int completedCount = 0; var roundRobinQueue = threadQueue.GroupBy(s => s.board) .SelectMany(grp => grp.Select((str, idx) => new { Index = idx, Value = str })) .OrderBy(v => v.Index).ThenBy(v => v.Value.board) .Select(v => v.Value); foreach (var thread in roundRobinQueue) { if (completedCount % 50 == 0) { Program.Log($" --> Completed {completedCount} / {threadQueue.Count}. {threadQueue.Count - completedCount} to go"); } await threadSemaphore.WaitAsync(); weakReferences.Add(new WeakReference <Task>(Task.Run(async() => { bool success = await ThreadUpdateTask(CancellationToken.None, thread.board, thread.threadNumber); if (!success) { requeuedThreads.Add(thread); } threadSemaphore.Release(); }))); completedCount++; } foreach (var updateTask in weakReferences) { if (updateTask.TryGetTarget(out var task)) { await task; } } firstRun = false; System.Runtime.GCSettings.LargeObjectHeapCompactionMode = System.Runtime.GCLargeObjectHeapCompactionMode.CompactOnce; GC.Collect(); threadQueue.Clear(); foreach (var requeuedThread in requeuedThreads) { threadQueue.Add(requeuedThread); } await waitTask; } }