Exemple #1
0
        public static async Task Run(string path, long startId, bool ignoreHashCheck, bool includeDeleted)
        {
            string tempFolderPath       = Path.Combine(path, "_temp");
            string imageFolderPath      = Path.Combine(path, "images");
            string metadataDatabasePath = Path.Combine(path, "danbooru.sqlite");
            string lastPostJsonPath     = Path.Combine(path, "last_post.json");

            PathUtility.CreateDirectoryIfNotExists(path);
            PathUtility.CreateDirectoryIfNotExists(tempFolderPath);
            PathUtility.CreateDirectoryIfNotExists(imageFolderPath);

            using (SqliteConnection connection = new SqliteConnection(new SqliteConnectionStringBuilder
            {
                DataSource = metadataDatabasePath,
            }.ToString()))
            {
                connection.Open();

                SQLiteUtility.TryCreateTable(connection);

                while (true)
                {
                    // Get posts metadata as json
                    JObject[] postJObjects = null;

                    await TaskUtility.RunWithRetry(async() =>
                    {
                        Log.Info($"Downloading metadata ... ({startId} ~ )");
                        postJObjects = await DanbooruUtility.GetPosts(startId);
                    }, e =>
                    {
                        Log.Error(e);
                        return(true);
                    },
                                                   10,
                                                   3000);

                    if (postJObjects.Length == 0)
                    {
                        Log.Info("There is no posts.");
                        break;
                    }

                    // Validate post
                    Log.Info($"Checking {postJObjects.Length} posts ...");
                    Post[] posts = postJObjects.Select(p => ConvertToPost(p)).Where(p => p != null).ToArray();

                    Parallel.ForEach(posts, post =>
                    {
                        if (string.IsNullOrEmpty(post.Md5))
                        {
                            Log.Debug($"Skip for empty MD5 : Id={post.Id}");
                            return;
                        }

                        if (string.IsNullOrEmpty(post.ImageUrl))
                        {
                            Log.Debug($"Skip for empty image URL : Id={post.Id}");
                            return;
                        }

                        if (post.IsDeleted && !includeDeleted)
                        {
                            return;
                        }

                        if (post.IsPending)
                        {
                            return;
                        }

                        post.IsValid = true;

                        string metadataPath = GetPostLocalMetadataPath(imageFolderPath, post);

                        try
                        {
                            if (File.Exists(metadataPath))
                            {
                                Post cachedPost = ConvertToPost(JObject.Parse(File.ReadAllText(metadataPath)));

                                if (cachedPost == null || post.UpdatedDate > cachedPost.UpdatedDate)
                                {
                                    post.ShouldSaveMetadata = true;
                                    post.ShouldUpdateImage  = true;
                                }
                            }
                            else
                            {
                                post.ShouldSaveMetadata = true;
                                post.ShouldUpdateImage  = true;
                            }
                        }
                        catch (Exception e)
                        {
                            post.ShouldSaveMetadata = true;
                            Log.Error(e);
                        }

                        string imagePath = GetPostLocalImagePath(imageFolderPath, post);

                        if (!File.Exists(imagePath))
                        {
                            post.ShouldDownloadImage = true;
                            return;
                        }
                        else
                        {
                            if (post.ShouldUpdateImage || !ignoreHashCheck)
                            {
                                string cachedImageMd5 = GetMd5Hash(imagePath);

                                if (post.Md5 != cachedImageMd5)
                                {
                                    post.ShouldDownloadImage = true;
                                    Log.Info($"MD5 is different to cached image : Id={post.Id}, {post.Md5} (new) != {cachedImageMd5} (cached)");
                                    return;
                                }
                            }
                        }
                    });

                    int shouldDownloadCount = posts.Where(p => p.ShouldDownloadImage).Count();
                    int shouldUpdateCount   = posts.Where(p => p.ShouldSaveMetadata).Count();
                    int pendingCount        = posts.Where(p => p.IsPending).Count();

                    if (shouldUpdateCount > 0 || shouldDownloadCount > 0)
                    {
                        Log.Info($"{shouldUpdateCount}/{posts.Length} posts are updated. {pendingCount} posts are pending. Downloading {shouldDownloadCount} posts ...");
                    }

                    foreach (Post post in posts)
                    {
                        if (!post.IsValid)
                        {
                            continue;
                        }

                        string metadataPath  = GetPostLocalMetadataPath(imageFolderPath, post);
                        string imagePath     = GetPostLocalImagePath(imageFolderPath, post);
                        string tempImagePath = GetPostTempImagePath(tempFolderPath, post);

                        PathUtility.CreateDirectoryIfNotExists(Path.GetDirectoryName(imagePath));

                        try
                        {
                            await TaskUtility.RunWithRetry(async() =>
                            {
                                if (post.ShouldDownloadImage)
                                {
                                    Log.Info($"Downloading post {post.Id} ...");
                                    await Download(post.ImageUrl, tempImagePath);

                                    string downloadedMd5 = GetMd5Hash(tempImagePath);

                                    if (downloadedMd5 != post.Md5)
                                    {
                                        Log.Warn($"MD5 hash of downloaded image is different : Id={post.Id}, {post.Md5} (metadata) != {downloadedMd5} (downloaded)");
                                        try
                                        {
                                            File.Delete(tempImagePath);
                                        }
                                        finally { }

                                        try
                                        {
                                            File.Delete(metadataPath);
                                        }
                                        finally { }
                                        throw new Exception();
                                    }

                                    File.Delete(imagePath);
                                    File.Move(tempImagePath, imagePath);
                                }

                                if (post.ShouldDownloadImage || post.ShouldSaveMetadata)
                                {
                                    PathUtility.ChangeFileTimestamp(imagePath, post.CreatedDate, post.UpdatedDate);
                                }

                                if (post.ShouldSaveMetadata)
                                {
                                    File.WriteAllText(metadataPath, post.JObject.ToString());
                                }
                            }, e =>
                            {
                                return(!(e is NotRetryableException));
                            }, 10, 3000);
                        }
                        catch (NotRetryableException)
                        {
                            Log.Error($"Can't retryable exception was occured : Id={post.Id}");
                            post.IsValid = false;
                        }
                    }

                    Log.Info("Updating database ...");
                    SQLiteUtility.InsertOrReplace(connection, posts.Where(p => p.IsValid).Select(p => p.JObject));

                    long lastId = long.Parse(posts.Last().Id);

                    startId = lastId + 1;
                }

                try
                {
                    Directory.Delete(tempFolderPath, true);
                }
                catch (Exception e)
                {
                    Log.Warn(e);
                }
                Log.Info("Dump command is complete.");
            }
        }