void DownloadImgurAlbum(Uri url, string fileName, Post p, string curDir) { try { string imgurID = url.AbsolutePath; imgurID = imgurID.Split('/')[2]; if (imgurID.Length < 3) { return; } //logger.Log("Saving Album: " + log, "/r/" + sub.Name, _verbose); //logger.Log("Album ID: " + imgurID, "/r/" + sub.Name, _verbose); var task = albumEndpoint.GetAlbumImagesAsync(imgurID); Task.WaitAll(task); var album = task.Result; int i = 1; foreach (var image in album) { if (!isRunning) { break; } string thisFile = "[" + p.CreatedUTC.ToEpoch() + "] [" + i + "] " + fileName; thisFile = curDir + ((p.NSFW) ? "/NSFW" : "") + "/" + thisFile; //logger.Log(thisFile, name, verbose); string ext = Path.GetExtension(image.Link); if (File.Exists(thisFile + ((ext == ".gif") ? ext : ".png"))) { //logger.LogWarning("Skipping \"" + p.Title + "\", file already exsits", "/r/" + sub.Name, verbose); i++; continue; } SaveImage(p, thisFile, new Uri(image.Link)); i++; } } catch (Exception e) { LOGGER.LogWarning($"Unable to Download {p.Title}, {e.Message}", $"/r/{p.SubredditName}", verbose); LOGGER.LogError(e.StackTrace, name, verbose); } }
//Download an image via url void SaveImage(Post p, string file, Uri url) { try { data = _client.DownloadData(url); string ext = Path.GetExtension(url.AbsolutePath); using (FileStream image = new FileStream($"{file}{ext}", FileMode.Create, FileAccess.Write)) { try { if (!Directory.GetParent(file).Exists) { Directory.GetParent(file).Create(); } image.Write(data, 0, data.Length); LOGGER.Log($"Saved {file}{ext}", $"/r/{p.SubredditName}", verbose); image.Flush(); RinDB.AddImage(new ImageModel() { name = Uri.EscapeDataString(Path.GetFileNameWithoutExtension(file).Remove(0, p.CreatedUTC.ToEpoch().ToString().Length + 2)), fileUri = Uri.EscapeDataString($"{file.Replace($"{baseDir}/", "")}{ext}"), timeadded = p.CreatedUTC.ToEpoch(), isnsfw = p.NSFW, tags = _curTags?.ToList() }); imgCount++; }catch (Exception e) { LOGGER.LogError($"Failed to save \"{p.Title}\", {e.Message}", $"/r/{p.SubredditName}", verbose); LOGGER.LogError(e.StackTrace, name, verbose); } } } catch (Exception e) { LOGGER.LogWarning($"Unable to Download {p.Title}, {e.Message}", $"/r/{p.SubredditName}", verbose); LOGGER.LogError(e.StackTrace, name, verbose); } }
//Start find and download the images void Crawl() { if (_reddit == null || _imgurClient == null) { Setup(); } if (getFrom != null) { LOGGER.Log($"Starting crawl of {getFrom}", name, verbose); } else { LOGGER.Log($"Starting Crawl of {subreddits.Count} subreddits", name, verbose); } string curDir = ""; imgCount = 0; string file = ""; Listing <Post> posts = default(Listing <Post>); List <Subreddit> subs; bool postGet = false; subs = new List <Subreddit>(); _crawlLoop = StartTimer("RedditCrawler", info => { try { imgCount = 0; if (needsReBuild) { subs.Clear(); LOGGER.Log("Rebuilding subreddit list", name, verbose); if (getFrom != null) { try { subs.Add(_reddit.GetSubreddit(getFrom)); LOGGER.Log($"Connected to {getFrom}", name, verbose); } catch (Exception e) { LOGGER.LogWarning($"Failed to connect to subreddit: {getFrom}, {e.Message}", name, verbose); _crawlLoop.Dispose(); } } else { if (subreddits.Count == 0) { isRunning = false; return; } subs.Clear(); foreach (string s in subreddits) { if (!isRunning) { break; } try { subs.Add(_reddit.GetSubreddit(s)); LOGGER.Log($"Connected to {s}", name, verbose); } catch (Exception e) { LOGGER.LogWarning($"Failed to connect to subreddit: {s}, {e.Message}", name, verbose); LOGGER.LogWarning(e.StackTrace, name, verbose); continue; } } } needsReBuild = false; LOGGER.Log("Finished Building, starting crawl", name, verbose); } if (!isRunning) { _crawlLoop.Dispose(); } foreach (Subreddit sub in subs) { if (!isRunning) { break; } SelectTags(sub.Name); //Change the current directory and make sure it exists curDir = baseDir + "/" + sub.Name; if (!Directory.Exists(curDir)) { Directory.CreateDirectory(curDir); } bool subCollected = false; int retryCount = 0; while (!subCollected) { if (!isRunning || retryCount >= 10) { if (retryCount >= 10) { Write("Retry timeout"); } break; } try { //Get posts switch (searchMode) { case SearchMode.Hot: posts = sub.Hot; break; case SearchMode.New: posts = sub.New; break; case SearchMode.Top: posts = sub.GetTop(FromTime.All); break; default: posts = sub.New; break; } subCollected = true; } catch (Exception e) { LOGGER.LogWarning($"Failed to connect to reddit: {e.Message}, retrying...", name, verbose); retryCount++; } } postGet = false; while (!postGet) { if (!isRunning) { break; } try { foreach (Post p in posts.Take(postsToGet)) { postGet = true; if (!isRunning) { break; } string log = (p.Title + " " + p.Url); string ext = Path.GetExtension(p.Url.AbsolutePath); if (p.NSFW && !Directory.Exists(curDir + "/NSFW")) { Directory.CreateDirectory(curDir + "/NSFW"); } //Create file name file = FormatFileName(p.Title); if (allowedFiles.Contains(ext)) //Direct link to image file { file = "[" + p.CreatedUTC.ToEpoch() + "] " + file; file = curDir + ((p.NSFW) ? "/NSFW" : "") + "/" + file; if (File.Exists($"{file}{ext}")) { //logger.LogWarning("Skipping \"" + p.Title + "\", file already exsits", "/r/" + sub.Name, verbose); continue; } //logger.Log("Saving: " + log, "/r/" + sub.Name, _verbose); SaveImage(p, file, p.Url); } else if (p.Url.DnsSafeHost == "imgur.com") //Imgur in-direct link/album { string imgurID = Path.GetFileNameWithoutExtension(p.Url.AbsolutePath); if (p.Url.AbsolutePath.Contains("/a/") || p.Url.AbsolutePath.Contains("/gallery/")) //Save Imgur Album { DownloadImgurAlbum(p.Url, file, p, curDir); } else { if (imgurID != "new") //Save Imgur in-drect link { file = "[" + p.CreatedUTC.ToEpoch() + "] " + file; file = curDir + ((p.NSFW) ? "/NSFW" : "") + "/" + file; try { string link = $"http://i.imgur.com/{imgurID}.png"; ext = Path.GetExtension(link); if (File.Exists($"{file}{ext}")) { //logger.LogWarning("Skipping \"" + p.Title + "\", file already exsits", "/r/" + sub.Name, verbose); continue; } //logger.Log("Saving: " + log, "/r/" + sub.Name, _verbose); SaveImage(p, file, new Uri(link)); } catch (Exception e) { LOGGER.LogWarning($"Unable to Download {p.Title}, {e.Message}", $"/r/{p.SubredditName}", verbose); LOGGER.LogError(e.StackTrace, name, verbose); } } } } file = ""; } } catch (Exception e) { LOGGER.LogWarning($"Failed to get posts: {e.Message}, retrying...", name, verbose); postGet = false; } } } } catch (Exception e) { LOGGER.LogError($"Crawl failed... {e.Message}, shutting down", name, verbose); LOGGER.LogError(e.StackTrace, name, verbose); Write("Crawl Ended"); isRunning = false; } if (!isRunning && _crawlLoop != null) { StopTimer("RedditCrawler"); } if (!loop) { LOGGER.Log($"Finished Dowloading {imgCount} images... shutting down", name, verbose); isRunning = false; loop = true; } else { LOGGER.Log($"Dowloaded {imgCount} images...", name, verbose); LOGGER.Log($"Sleeping for {updateRate}ms", name, verbose); } }, 0, updateRate); }