Example #1
0
        /// <summary>
        /// Performs the main archival loop.
        /// </summary>
        /// <param name="token">Token to safely cancel the execution.</param>
        public async Task Execute(CancellationToken token)
        {
            bool firstRun = true;

            List <ThreadPointer>        threadQueue    = new List <ThreadPointer>();
            Queue <QueuedImageDownload> enqueuedImages = new Queue <QueuedImageDownload>();
            List <QueuedImageDownload>  requeuedImages = new List <QueuedImageDownload>();

            SortedList <string, DateTimeOffset> lastBoardCheckTimes = new SortedList <string, DateTimeOffset>(Config.Boards.Length);

            while (!token.IsCancellationRequested)
            {
                await Config.Boards.ForEachAsync(4, async board =>
                {
                    DateTimeOffset lastDateTimeCheck;

                    lock (lastBoardCheckTimes)
                        if (!lastBoardCheckTimes.TryGetValue(board, out lastDateTimeCheck))
                        {
                            lastDateTimeCheck = DateTimeOffset.MinValue;
                        }

                    DateTimeOffset beforeCheckTime = DateTimeOffset.Now;

                    var threads = await GetBoardThreads(token, board, lastDateTimeCheck, firstRun);

                    lock (threadQueue)
                        threadQueue.AddRange(threads);

                    if (firstRun)
                    {
                        var archivedThreads = await GetArchivedBoardThreads(token, board, lastDateTimeCheck);

                        lock (threadQueue)
                            threadQueue.AddRange(archivedThreads);
                    }

                    lock (lastBoardCheckTimes)
                        lastBoardCheckTimes[board] = beforeCheckTime;
                });

                threadQueue = threadQueue.Distinct().ToList();

                Program.Log($"{threadQueue.Count} threads have been queued total");
                threadQueue.TrimExcess();

                var waitTask = Task.Delay(BoardUpdateTimespan, token);


                var threadTasks = new Queue <WeakReference <Task> >();

                var requeuedThreads = new List <ThreadPointer>();

                void QueueProxyCall(Func <HttpClientProxy, Task> action)
                {
                    var task = Task.Run(async() =>
                    {
                        await using var client = await ProxyProvider.RentHttpClient();

                        var threadWaitTask = Task.Delay(ApiCooldownTimespan);

                        try
                        {
                            await action(client.Object);
                        }
                        catch (Exception ex)
                        {
                            Program.Log($"ERROR: Network operation failed, and was unhandled. Inconsistencies may arise in continued use of program\r\n" + ex.ToString());
                        }

                        await threadWaitTask;
                    });

                    lock (threadTasks)
                        threadTasks.Enqueue(new WeakReference <Task>(task));
                }

                int threadCompletedCount = 0;
                int imageCompletedCount  = 0;

                async Task DownloadEnqueuedImage(HttpClientProxy client)
                {
                    QueuedImageDownload queuedDownload;

                    lock (enqueuedImages)
                        if (!enqueuedImages.TryDequeue(out queuedDownload))
                        {
                            return;
                        }

                    if (File.Exists(queuedDownload.DownloadPath))
                    {
                        Interlocked.Increment(ref imageCompletedCount);
                        return;
                    }

                    await Task.Delay(100);                     // Wait 100ms because we're nice people

                    try
                    {
                        await DownloadFileTask(queuedDownload.DownloadUri, queuedDownload.DownloadPath, client.Client);
                    }
                    catch (Exception ex)
                    {
                        Program.Log($"ERROR: Could not download image. Will try again next board update\nClient name: {client.Name}\nException: {ex}");

                        lock (requeuedImages)
                            requeuedImages.Add(queuedDownload);
                    }

                    Interlocked.Increment(ref imageCompletedCount);
                }

                enqueuedImages.Clear();

                if (firstRun)
                {
                    foreach (var queuedImage in await StateStore.GetDownloadQueue())
                    {
                        enqueuedImages.Enqueue(queuedImage);
                    }

                    Program.Log($"{enqueuedImages.Count} media items loaded from queue cache");
                }

                foreach (var queuedImage in requeuedImages)
                {
                    enqueuedImages.Enqueue(queuedImage);
                }

                requeuedImages.Clear();

                var threadSemaphore = new SemaphoreSlim(20);

                foreach (var thread in threadQueue.RoundRobin(x => x.Board))
                {
                    if (token.IsCancellationRequested)
                    {
                        break;
                    }

                    await threadSemaphore.WaitAsync();

                    QueueProxyCall(async client =>
                    {
                        if (token.IsCancellationRequested)
                        {
                            return;
                        }

                        (bool success, IList <QueuedImageDownload> imageDownloads)
                            = await ThreadUpdateTask(CancellationToken.None, thread.Board, thread.ThreadId, client);

                        int newCompletedCount = Interlocked.Increment(ref threadCompletedCount);

                        if (newCompletedCount % 50 == 0)
                        {
                            Program.Log($" --> Completed {threadCompletedCount} / {threadQueue.Count} : {threadQueue.Count - threadCompletedCount} to go");

                            lock (enqueuedImages)
                                Program.Log($" --> {enqueuedImages.Count} in image queue");
                        }

                        if (!success)
                        {
                            lock (requeuedThreads)
                                requeuedThreads.Add(thread);
                        }
                        else
                        {
                            // Fallback to Monitor references instead of lock(){} because of some compiler rule that doesn't allow us to await inside of locks
                            try
                            {
                                Monitor.Enter(enqueuedImages);

                                foreach (var imageDownload in imageDownloads)
                                {
                                    enqueuedImages.Enqueue(imageDownload);
                                }

                                await StateStore.WriteDownloadQueue(enqueuedImages.ToArray());
                            }
                            finally
                            {
                                Monitor.Exit(enqueuedImages);
                            }


                            // Perform 100 image downloads on a thread.
                            for (int i = 0; i < 100; i++)
                            {
                                if (token.IsCancellationRequested)
                                {
                                    break;
                                }

                                await DownloadEnqueuedImage(client);
                            }
                        }

                        threadSemaphore.Release();
                    });
                }

                // Queue a download task to download all remaining images.
                QueueProxyCall(async client =>
                {
                    while (true)
                    {
                        if (token.IsCancellationRequested)
                        {
                            break;
                        }

                        QueuedImageDownload queuedDownload;

                        lock (enqueuedImages)
                            if (!enqueuedImages.TryDequeue(out queuedDownload))
                            {
                                break;
                            }

                        await Task.Delay(100);                         // Wait 100ms because we're nice people

                        await DownloadEnqueuedImage(client);
                    }
                });

                // Wait for all currently running/enqueued thread download tasks
                while (true)
                {
                    WeakReference <Task> remainingTask;

                    lock (threadTasks)
                        if (!threadTasks.TryDequeue(out remainingTask))
                        {
                            break;
                        }

                    if (remainingTask.TryGetTarget(out var task))
                    {
                        await task;
                    }
                }

                Program.Log($" --> Completed {threadCompletedCount} / {threadQueue.Count} : Waiting for next board update interval");

                firstRun = false;

                // A bit overkill but force a compacting GC collect here to make sure that the heap doesn't expand too much over time
                System.Runtime.GCSettings.LargeObjectHeapCompactionMode = System.Runtime.GCLargeObjectHeapCompactionMode.CompactOnce;
                GC.Collect();


                threadQueue.Clear();

                threadQueue.AddRange(requeuedThreads);

                await waitTask;
            }
        }
Example #2
0
        /// <summary>
        /// Performs the main archival loop.
        /// </summary>
        /// <param name="token">Token to safely cancel the execution.</param>
        public async Task Execute(CancellationToken token)
        {
            bool firstRun            = true;
            var  imageDownloadClient = new HttpClientProxy(ProxyProvider.CreateNewClient(), "baseconnection/image");

            List <ThreadPointer> threadQueue = new List <ThreadPointer>();
            ConcurrentQueue <QueuedImageDownload> enqueuedImages = new ConcurrentQueue <QueuedImageDownload>();
            List <QueuedImageDownload>            requeuedImages = new List <QueuedImageDownload>();

            SortedList <string, DateTimeOffset> lastBoardCheckTimes = new SortedList <string, DateTimeOffset>(Config.Boards.Length);

            while (!token.IsCancellationRequested)
            {
                int currentBoardCount = 0;

                await Config.Boards.ForEachAsync(8, async board =>
                {
                    token.ThrowIfCancellationRequested();

                    DateTimeOffset lastDateTimeCheck;

                    lock (lastBoardCheckTimes)
                        if (!lastBoardCheckTimes.TryGetValue(board, out lastDateTimeCheck))
                        {
                            lastDateTimeCheck = DateTimeOffset.MinValue;
                        }

                    DateTimeOffset beforeCheckTime = DateTimeOffset.Now;

                    var threads = await GetBoardThreads(token, board, lastDateTimeCheck, firstRun);

                    lock (threadQueue)
                        threadQueue.AddRange(threads);

                    if (firstRun && Config.ReadArchive)
                    {
                        var archivedThreads = await GetArchivedBoardThreads(token, board, lastDateTimeCheck);

                        lock (threadQueue)
                            threadQueue.AddRange(archivedThreads);
                    }

                    lock (lastBoardCheckTimes)
                    {
                        lastBoardCheckTimes[board] = beforeCheckTime;

                        if (++currentBoardCount % 5 == 0 || currentBoardCount == Config.Boards.Length)
                        {
                            Program.Log($"{currentBoardCount} / {Config.Boards.Length} boards enqueued");
                        }
                    }
                });

                if (token.IsCancellationRequested)
                {
                    break;
                }

                Program.Log($"{threadQueue.Count} threads have been queued total");
                threadQueue.TrimExcess();

                var waitTask = Task.Delay(BoardUpdateTimespan, token);


                var requeuedThreads = new List <ThreadPointer>();

                async Task AsyncProxyCall(Func <HttpClientProxy, Task> action)
                {
                    await using var client = await ProxyProvider.RentHttpClient();

                    var threadWaitTask = Task.Delay(ApiCooldownTimespan);

                    try
                    {
                        await action(client.Object);
                    }
                    catch (Exception ex)
                    {
                        Program.Log($"ERROR: Network operation failed, and was unhandled. Inconsistencies may arise in continued use of program\r\n" + ex.ToString());
                    }

                    await threadWaitTask;
                }

                int threadCompletedCount = 0;
                int imageCompletedCount  = 0;

                async Task <int> DownloadEnqueuedImage(HttpClientProxy client, QueuedImageDownload image)
                {
                    QueuedImageDownload queuedDownload = image;

                    if (image == null)
                    {
                        if (!enqueuedImages.TryDequeue(out queuedDownload))
                        {
                            return(imageCompletedCount);
                        }
                    }

                    if (File.Exists(queuedDownload.DownloadPath))
                    {
                        return(Interlocked.Increment(ref imageCompletedCount));
                    }

                    var waitTask = Task.Delay(50, token);                     // Wait 100ms because we're nice people

                    try
                    {
                        await DownloadFileTask(queuedDownload.DownloadUri, queuedDownload.DownloadPath, client.Client);
                    }
                    catch (Exception ex)
                    {
                        Program.Log($"ERROR: Could not download image. Will try again next board update\nClient name: {client.Name}\nException: {ex}");

                        lock (requeuedImages)
                            requeuedImages.Add(queuedDownload);
                    }

                    await waitTask;

                    return(Interlocked.Increment(ref imageCompletedCount));
                }

                if (firstRun)
                {
                    foreach (var queuedImage in await StateStore.GetDownloadQueue())
                    {
                        enqueuedImages.Enqueue(queuedImage);
                    }

                    Program.Log($"{enqueuedImages.Count} media items loaded from queue cache");
                }

                foreach (var queuedImage in requeuedImages)
                {
                    enqueuedImages.Enqueue(queuedImage);
                }

                requeuedImages.Clear();

                using var roundRobinQueue = threadQueue.RoundRobin(x => x.Board).GetEnumerator();

                IDictionary <int, string> WorkerStatuses = new ConcurrentDictionary <int, string>();

                async Task WorkerTask(int id, bool prioritizeImages)
                {
                    var idString = id.ToString();

                    async Task <bool> CheckImages()
                    {
                        bool success = enqueuedImages.TryDequeue(out var nextImage);

                        if (success)
                        {
                            WorkerStatuses[id] = $"Downloading image {nextImage.DownloadUri}";

                            int completedCount = await DownloadEnqueuedImage(imageDownloadClient, nextImage);

                            if (completedCount % 10 == 0)
                            {
                                Program.Log($"{"[Image]",-9} [{completedCount}/{enqueuedImages.Count}]");
                            }
                        }

                        return(success);
                    }

                    async Task <bool> CheckThreads()
                    {
                        bool          success = false;
                        ThreadPointer nextThread;

                        lock (roundRobinQueue)
                        {
                            success    = roundRobinQueue.MoveNext();
                            nextThread = roundRobinQueue.Current;
                        }

                        if (!success)
                        {
                            return(false);
                        }

                        WorkerStatuses[id] = $"Scraping thread /{nextThread.Board}/{nextThread.ThreadId}";

                        bool outerSuccess = true;

                        using var timeoutToken = new CancellationTokenSource(TimeSpan.FromMinutes(2));

                        await AsyncProxyCall(async client =>
                        {
                            var result = await ThreadUpdateTask(timeoutToken.Token, idString, nextThread.Board, nextThread.ThreadId, client);

                            int newCompletedCount = Interlocked.Increment(ref threadCompletedCount);

                            string threadStatus = " ";

                            switch (result.Status)
                            {
                            case ThreadUpdateStatus.Ok:          threadStatus = " "; break;

                            case ThreadUpdateStatus.Archived:    threadStatus = "A"; break;

                            case ThreadUpdateStatus.Deleted:     threadStatus = "D"; break;

                            case ThreadUpdateStatus.NotModified: threadStatus = "N"; break;

                            case ThreadUpdateStatus.Error:       threadStatus = "E"; break;
                            }

                            if (!success)
                            {
                                lock (requeuedThreads)
                                    requeuedThreads.Add(nextThread);

                                outerSuccess = false;
                                return;
                            }

                            Program.Log($"{"[Thread]",-9} {$"/{nextThread.Board}/{nextThread.ThreadId}",-17} {threadStatus} {$"+({result.ImageDownloads.Count}/{result.PostCount})",-13} [{enqueuedImages.Count}/{newCompletedCount}/{threadQueue.Count}]");

                            foreach (var imageDownload in result.ImageDownloads)
                            {
                                enqueuedImages.Enqueue(imageDownload);
                            }

                            await StateStore.InsertToDownloadQueue(new ReadOnlyCollection <QueuedImageDownload>(result.ImageDownloads));
                        });

                        return(outerSuccess);
                    }

                    while (true)
                    {
                        WorkerStatuses[id] = "Idle";

                        if (token.IsCancellationRequested)
                        {
                            break;
                        }

                        if (prioritizeImages)
                        {
                            if (await CheckImages())
                            {
                                continue;
                            }
                        }

                        if (await CheckThreads())
                        {
                            continue;
                        }

                        if (await CheckImages())
                        {
                            continue;
                        }

                        break;
                    }

                    Program.Log($"Worker ID {idString} finished", true);

                    WorkerStatuses[id] = "Finished";

                    if (Program.HaydenConfig.DebugLogging)
                    {
                        lock (WorkerStatuses)
                            foreach (var kv in WorkerStatuses)
                            {
                                Program.Log($"ID {kv.Key,-2} => {kv.Value}", true);
                            }
                    }
                }

                List <Task> workerTasks = new List <Task>();

                int id = 1;

                for (int i = 0; i < ProxyProvider.ProxyCount; i++)
                {
                    workerTasks.Add(WorkerTask(id++, i % 3 == 0));
                }

                await Task.WhenAll(workerTasks);


                Program.Log($" --> Completed {threadCompletedCount} / {threadQueue.Count} : Waiting for next board update interval");


                enqueuedImages.Clear();
                await StateStore.WriteDownloadQueue(enqueuedImages);

                Program.Log($" --> Cleared queued image cache");

                firstRun = false;

                // A bit overkill but force a compacting GC collect here to make sure that the heap doesn't expand too much over time
                System.Runtime.GCSettings.LargeObjectHeapCompactionMode = System.Runtime.GCLargeObjectHeapCompactionMode.CompactOnce;
                GC.Collect();


                threadQueue.Clear();

                threadQueue.AddRange(requeuedThreads);

                await waitTask;
            }
        }