Esempio n. 1
0
 protected bool Equals(QueuedImageDownload other)
 {
     return(DownloadUri.AbsolutePath == other.DownloadUri.AbsolutePath &&
            DownloadPath == other.DownloadPath);
 }
Esempio n. 2
0
        /// <summary>
        /// Performs the main archival loop.
        /// </summary>
        /// <param name="token">Token to safely cancel the execution.</param>
        public async Task Execute(CancellationToken token)
        {
            bool firstRun            = true;
            var  imageDownloadClient = new HttpClientProxy(ProxyProvider.CreateNewClient(), "baseconnection/image");

            List <ThreadPointer> threadQueue = new List <ThreadPointer>();
            ConcurrentQueue <QueuedImageDownload> enqueuedImages = new ConcurrentQueue <QueuedImageDownload>();
            List <QueuedImageDownload>            requeuedImages = new List <QueuedImageDownload>();

            SortedList <string, DateTimeOffset> lastBoardCheckTimes = new SortedList <string, DateTimeOffset>(Config.Boards.Length);

            while (!token.IsCancellationRequested)
            {
                int currentBoardCount = 0;

                await Config.Boards.ForEachAsync(8, async board =>
                {
                    token.ThrowIfCancellationRequested();

                    DateTimeOffset lastDateTimeCheck;

                    lock (lastBoardCheckTimes)
                        if (!lastBoardCheckTimes.TryGetValue(board, out lastDateTimeCheck))
                        {
                            lastDateTimeCheck = DateTimeOffset.MinValue;
                        }

                    DateTimeOffset beforeCheckTime = DateTimeOffset.Now;

                    var threads = await GetBoardThreads(token, board, lastDateTimeCheck, firstRun);

                    lock (threadQueue)
                        threadQueue.AddRange(threads);

                    if (firstRun && Config.ReadArchive)
                    {
                        var archivedThreads = await GetArchivedBoardThreads(token, board, lastDateTimeCheck);

                        lock (threadQueue)
                            threadQueue.AddRange(archivedThreads);
                    }

                    lock (lastBoardCheckTimes)
                    {
                        lastBoardCheckTimes[board] = beforeCheckTime;

                        if (++currentBoardCount % 5 == 0 || currentBoardCount == Config.Boards.Length)
                        {
                            Program.Log($"{currentBoardCount} / {Config.Boards.Length} boards enqueued");
                        }
                    }
                });

                if (token.IsCancellationRequested)
                {
                    break;
                }

                Program.Log($"{threadQueue.Count} threads have been queued total");
                threadQueue.TrimExcess();

                var waitTask = Task.Delay(BoardUpdateTimespan, token);


                var requeuedThreads = new List <ThreadPointer>();

                async Task AsyncProxyCall(Func <HttpClientProxy, Task> action)
                {
                    await using var client = await ProxyProvider.RentHttpClient();

                    var threadWaitTask = Task.Delay(ApiCooldownTimespan);

                    try
                    {
                        await action(client.Object);
                    }
                    catch (Exception ex)
                    {
                        Program.Log($"ERROR: Network operation failed, and was unhandled. Inconsistencies may arise in continued use of program\r\n" + ex.ToString());
                    }

                    await threadWaitTask;
                }

                int threadCompletedCount = 0;
                int imageCompletedCount  = 0;

                async Task <int> DownloadEnqueuedImage(HttpClientProxy client, QueuedImageDownload image)
                {
                    QueuedImageDownload queuedDownload = image;

                    if (image == null)
                    {
                        if (!enqueuedImages.TryDequeue(out queuedDownload))
                        {
                            return(imageCompletedCount);
                        }
                    }

                    if (File.Exists(queuedDownload.DownloadPath))
                    {
                        return(Interlocked.Increment(ref imageCompletedCount));
                    }

                    var waitTask = Task.Delay(50, token);                     // Wait 100ms because we're nice people

                    try
                    {
                        await DownloadFileTask(queuedDownload.DownloadUri, queuedDownload.DownloadPath, client.Client);
                    }
                    catch (Exception ex)
                    {
                        Program.Log($"ERROR: Could not download image. Will try again next board update\nClient name: {client.Name}\nException: {ex}");

                        lock (requeuedImages)
                            requeuedImages.Add(queuedDownload);
                    }

                    await waitTask;

                    return(Interlocked.Increment(ref imageCompletedCount));
                }

                if (firstRun)
                {
                    foreach (var queuedImage in await StateStore.GetDownloadQueue())
                    {
                        enqueuedImages.Enqueue(queuedImage);
                    }

                    Program.Log($"{enqueuedImages.Count} media items loaded from queue cache");
                }

                foreach (var queuedImage in requeuedImages)
                {
                    enqueuedImages.Enqueue(queuedImage);
                }

                requeuedImages.Clear();

                using var roundRobinQueue = threadQueue.RoundRobin(x => x.Board).GetEnumerator();

                IDictionary <int, string> WorkerStatuses = new ConcurrentDictionary <int, string>();

                async Task WorkerTask(int id, bool prioritizeImages)
                {
                    var idString = id.ToString();

                    async Task <bool> CheckImages()
                    {
                        bool success = enqueuedImages.TryDequeue(out var nextImage);

                        if (success)
                        {
                            WorkerStatuses[id] = $"Downloading image {nextImage.DownloadUri}";

                            int completedCount = await DownloadEnqueuedImage(imageDownloadClient, nextImage);

                            if (completedCount % 10 == 0)
                            {
                                Program.Log($"{"[Image]",-9} [{completedCount}/{enqueuedImages.Count}]");
                            }
                        }

                        return(success);
                    }

                    async Task <bool> CheckThreads()
                    {
                        bool          success = false;
                        ThreadPointer nextThread;

                        lock (roundRobinQueue)
                        {
                            success    = roundRobinQueue.MoveNext();
                            nextThread = roundRobinQueue.Current;
                        }

                        if (!success)
                        {
                            return(false);
                        }

                        WorkerStatuses[id] = $"Scraping thread /{nextThread.Board}/{nextThread.ThreadId}";

                        bool outerSuccess = true;

                        using var timeoutToken = new CancellationTokenSource(TimeSpan.FromMinutes(2));

                        await AsyncProxyCall(async client =>
                        {
                            var result = await ThreadUpdateTask(timeoutToken.Token, idString, nextThread.Board, nextThread.ThreadId, client);

                            int newCompletedCount = Interlocked.Increment(ref threadCompletedCount);

                            string threadStatus = " ";

                            switch (result.Status)
                            {
                            case ThreadUpdateStatus.Ok:          threadStatus = " "; break;

                            case ThreadUpdateStatus.Archived:    threadStatus = "A"; break;

                            case ThreadUpdateStatus.Deleted:     threadStatus = "D"; break;

                            case ThreadUpdateStatus.NotModified: threadStatus = "N"; break;

                            case ThreadUpdateStatus.Error:       threadStatus = "E"; break;
                            }

                            if (!success)
                            {
                                lock (requeuedThreads)
                                    requeuedThreads.Add(nextThread);

                                outerSuccess = false;
                                return;
                            }

                            Program.Log($"{"[Thread]",-9} {$"/{nextThread.Board}/{nextThread.ThreadId}",-17} {threadStatus} {$"+({result.ImageDownloads.Count}/{result.PostCount})",-13} [{enqueuedImages.Count}/{newCompletedCount}/{threadQueue.Count}]");

                            foreach (var imageDownload in result.ImageDownloads)
                            {
                                enqueuedImages.Enqueue(imageDownload);
                            }

                            await StateStore.InsertToDownloadQueue(new ReadOnlyCollection <QueuedImageDownload>(result.ImageDownloads));
                        });

                        return(outerSuccess);
                    }

                    while (true)
                    {
                        WorkerStatuses[id] = "Idle";

                        if (token.IsCancellationRequested)
                        {
                            break;
                        }

                        if (prioritizeImages)
                        {
                            if (await CheckImages())
                            {
                                continue;
                            }
                        }

                        if (await CheckThreads())
                        {
                            continue;
                        }

                        if (await CheckImages())
                        {
                            continue;
                        }

                        break;
                    }

                    Program.Log($"Worker ID {idString} finished", true);

                    WorkerStatuses[id] = "Finished";

                    if (Program.HaydenConfig.DebugLogging)
                    {
                        lock (WorkerStatuses)
                            foreach (var kv in WorkerStatuses)
                            {
                                Program.Log($"ID {kv.Key,-2} => {kv.Value}", true);
                            }
                    }
                }

                List <Task> workerTasks = new List <Task>();

                int id = 1;

                for (int i = 0; i < ProxyProvider.ProxyCount; i++)
                {
                    workerTasks.Add(WorkerTask(id++, i % 3 == 0));
                }

                await Task.WhenAll(workerTasks);


                Program.Log($" --> Completed {threadCompletedCount} / {threadQueue.Count} : Waiting for next board update interval");


                enqueuedImages.Clear();
                await StateStore.WriteDownloadQueue(enqueuedImages);

                Program.Log($" --> Cleared queued image cache");

                firstRun = false;

                // A bit overkill but force a compacting GC collect here to make sure that the heap doesn't expand too much over time
                System.Runtime.GCSettings.LargeObjectHeapCompactionMode = System.Runtime.GCLargeObjectHeapCompactionMode.CompactOnce;
                GC.Collect();


                threadQueue.Clear();

                threadQueue.AddRange(requeuedThreads);

                await waitTask;
            }
        }