public async Task <bool> ExistsAsync(string imageId, Enums.Sources source, CancellationToken cancellationToken) { using (var dbConnection = Connection()) { dbConnection.Open(); var results = await dbConnection.QueryFirstAsync <decimal>(_searchQuery, new { ImageId = imageId, Source = source }); return(results > 0); } }
public async Task <List <SearchResult> > Source(Enums.Sources id) { var request = new TopRequest() { By = "Source", Source = id }; if (!_settings.Cache.Database) { return(await _repo.TopImageSearchAsync(request, CancellationToken.None)); } return(await _memoryCache.GetOrCreateAsync($"top:source:{id}", async cacheEntry => { cacheEntry.SetAbsoluteExpiration(new TimeSpan(0, 60, 0)); return await _repo.TopImageSearchAsync(request, CancellationToken.None); })); }
public static ImageDetail ProcessImageElement(this IScrapeProcessor processor, IElement element, Enums.Sources source, Enums.Classes classification = Enums.Classes.Any) { var image = new ImageDetail { IndexSource = source }; if (classification != Enums.Classes.Any) { image.Class = classification; } if (!processor.GetImageUrl(element, image)) { return(null); } processor.GetWhoStamp(element, image); processor.GetResolution(element, image); processor.GetTags(element, image); try { image.ImageId = image.ImageUrl?.Split('/')?.Last()?.Split('.')?.First() ?? ""; image.ImageExtension = image.ImageUrl?.Split('/')?.Last()?.Split('.')?.Last() ?? ""; } catch { // pass } return(image); }
public void Process(Enums.Sources source, Enums.Classes classification = Enums.Classes.Any) { var url = ""; string board; CurrentBoard = source; try { //Get the board URL. board = Core.Constants.SourceUrls[source]; Helpers.LogMessage($"Starting Board: {board}"); //loop pages for (var x = 1; x <= Settings.MaxPages; x++) { if (x != 1) { url = $"{x}"; } //Helpers.LogMessage($"Getting Page Contents for {url}"); var pageContents = Helpers.GetPageContents(board + url); // - - get threads on pages //Helpers.LogMessage($"Getting Page Threads"); var threads = GetPageThreads(pageContents); //Helpers.LogMessage($"Threads found :: {threads.Count}"); //loop the threads on the this page foreach (var thread in threads) { //generate thread url var threadUrl = !string.IsNullOrWhiteSpace(Constants.ThreadUrlCleanup[source]) ? thread.Replace(Constants.ThreadUrlCleanup[source], "") : thread; //Helpers.LogMessage($"Getting Thread Contents :: {threadUrl}"); var threadContents = Helpers.GetPageContents(board + threadUrl); //Helpers.LogMessage($"Processing Post"); var post = GetPagePost(threadContents); //Helpers.LogMessage($"Processing Images"); var image = this.ProcessImageElement(post, source); if (string.IsNullOrWhiteSpace(image?.ImageId)) { continue; } // - - - - check if images have been scrapped before if (ScrapeRepositories.ImageScrapeRepository .ExistsAsync(image.ImageId, image.IndexSource, CancellationToken.None).GetAwaiter() .GetResult()) { continue; } Helpers.LogMessage($"Image not in Scrape Repo {image.ImageId}"); ScrapeRepositories.Queue.Enqueue(image); } } } catch (Exception ex) { Console.WriteLine(ex); } }
public void Process(Enums.Sources source, Enums.Classes classification = Enums.Classes.Any) { var url = ""; try { //Get the board URL. var board = Core.Constants.SourceUrls[source]; Helpers.LogMessage($"Starting Board: {board}"); var tabs = new List <string> { "top", "new", "" }; foreach (var tab in tabs) { var boardurl = $"{board}{tab}"; var lastPost = ""; //loop pages for (var x = 1; x <= Settings.MaxPages; x++) { if (x != 1) { url = $"?count={(x - 1) * 25}&after={lastPost}"; } Helpers.LogMessage($"Getting Page Contents for {boardurl}{url}"); var pageContents = Helpers.GetPageContents(boardurl + url); // - - get threads on pages //Helpers.LogMessage($"Getting Page Threads"); var threads = GetPageThreads(pageContents); //Helpers.LogMessage($"Threads found :: {threads.Count}"); if (!threads.Any()) { break; } //loop the threads on the this page foreach (var thread in threads) { var imagelink = thread.GetAttribute("data-domain"); var dataUrl = thread.GetAttribute("data-url"); try { if (imagelink == "imgur.com") { var ext = dataUrl.Split("/").Last().Split(".").Last(); if (new[] { "png", "jpeg", "jpg", "gif", "png" }.Contains(ext)) { imagelink = "i.imgur.com"; } } } catch { //do nothing } //determine the action to run switch (imagelink) { case "i.redd.it": //if domain = i.redd.it this is a direct link case "i.imgur.com": //if domain = i.imgur.com this is a direct link var image = new ImageDetail { IndexSource = source }; if (classification != Enums.Classes.Any) { image.Class = classification; } if (!GetImageUrl(thread, image)) { continue; } GetWhoStamp(thread, image); GetResolution(thread, image); GetTags(thread, image); try { image.ImageId = thread.GetAttribute("data-fullname"); image.ImageExtension = image.ImageUrl?.Split('/')?.Last()?.Split('.')?.Last() ?? ""; } catch { // pass } // - - - - check if images have been scrapped before if (ScrapeRepositories.ImageScrapeRepository.ExistsAsync(image.ImageId, imagelink == "i.imgur.com" ? Enums.Sources.Imgur : source, CancellationToken.None).GetAwaiter().GetResult()) { continue; } //Helpers.LogMessage($"Image not in Scrape Repo {image.ImageId}"); if (image != null) { ScrapeRepositories.Queue.Enqueue(image); } break; case "imgur.com": //if domain = imgur.com this is a gallery gotta go loop this shit ImgurHelper.ProcessImgurGallery(thread.GetAttribute("data-url") ?? "", source, classification); break; } } if (threads.Any()) { lastPost = threads.Last().GetAttribute("data-fullname"); } else { break; } } } } catch (Exception ex) { Console.WriteLine(ex); } }
public static void ProcessImgurGallery(string url, Enums.Sources source, Enums.Classes classification = Enums.Classes.Any) { if (string.IsNullOrWhiteSpace(url)) { return; } var page = Helpers.GetPageContents(url); var parser = new HtmlParser(); var document = parser.Parse(page); var jsElements = document.GetElementsByTagName("script"); if (!jsElements.Any()) { Helpers.LogMessage($"Cant find Script Sections for this page {url}"); return; } var imagesElement = jsElements.First(element => element.InnerHtml.Contains("window.runSlots")).TextContent; var startPosi = imagesElement.IndexOf("{", StringComparison.OrdinalIgnoreCase); var firstSemi = imagesElement.IndexOf(";", StringComparison.OrdinalIgnoreCase); var stopLength = firstSemi - startPosi; var json = imagesElement.Substring(startPosi, stopLength); try { if (string.IsNullOrWhiteSpace(json)) { return; } var images = JsonHelpers.LoadJson <ImgurJson>(json); if (images.Item.album_images.images.Any()) { //item.album_images.images foreach (var image in images.Item.album_images.images) { try { //{ // "hash": "kiylMhV", // "title": "", // "description": "Verona", // "width": 1920, // "height": 1080, // "size": 484650, // "ext": ".jpg", // "animated": false, // "prefer_video": false, // "looping": false, // "datetime": "2017-08-29 22:37:18" //} var title = image.title ?? ""; var description = image.description ?? ""; var extension = image.ext ?? ""; var imageDetail = new ImageDetail { ImageUrl = $"http://i.imgur.com/{image.hash}{image.ext}", ImageId = image.hash, ImageExtension = extension.Remove(0, 1), Resolution = $"{image.width}x{image.height}", Tag = document.GetElementsByClassName("post-title").First().TextContent + (!string.IsNullOrWhiteSpace(description) ? $",{description}" : "") + (!string.IsNullOrWhiteSpace(title) ? $",{title}" : ""), Who = images.Item.author ?? "Anonymous", IndexSource = Enums.Sources.Imgur }; if (classification != Enums.Classes.Any) { imageDetail.Class = classification; } // - - - - check if images have been scrapped before if (Helpers.ScrapeRepositories.ImageScrapeRepository.ExistsAsync(imageDetail.ImageId, imageDetail.IndexSource, CancellationToken.None).GetAwaiter().GetResult()) { continue; } //Helpers.LogMessage($"Image not in Scrape Repo {imageDetail.ImageId}"); Helpers.ScrapeRepositories.Queue.Enqueue(imageDetail); } catch (Exception e) { Helpers.LogMessage($"Error on Imgur For Loop : {e.Message}"); } } } else { //singular image var image = images.Item; try { //{ // "hash": "kiylMhV", // "title": "", // "description": "Verona", // "width": 1920, // "height": 1080, // "size": 484650, // "ext": ".jpg", // "animated": false, // "prefer_video": false, // "looping": false, // "datetime": "2017-08-29 22:37:18" //} var title = image.title ?? ""; var description = image.description ?? ""; var extension = image.ext ?? ""; var imageDetail = new ImageDetail { ImageUrl = $"http://i.imgur.com/{image.hash}{image.ext}", ImageId = image.hash, ImageExtension = extension.Remove(0, 1), Resolution = $"{image.width}x{image.height}", Tag = document.GetElementsByClassName("post-title").First().TextContent + (!string.IsNullOrWhiteSpace(description) ? $",{description}" : "") + (!string.IsNullOrWhiteSpace(title) ? $",{title}" : ""), Who = images.Item.author ?? "Anonymous", IndexSource = Enums.Sources.Imgur }; if (classification != Enums.Classes.Any) { imageDetail.Class = classification; } // - - - - check if images have been scrapped before if (Helpers.ScrapeRepositories.ImageScrapeRepository.ExistsAsync(imageDetail.ImageId, imageDetail.IndexSource, CancellationToken.None).GetAwaiter().GetResult()) { return; } //Helpers.LogMessage($"Image not in Scrape Repo {imageDetail.ImageId}"); Helpers.ScrapeRepositories.Queue.Enqueue(imageDetail); } catch (Exception e) { Helpers.LogMessage($"Error on Imgur For Loop : {e.Message}"); } } } catch (Exception e) { //possibly only a single picture page Helpers.LogMessage($"Error on Imgur JSON Parse : {e.Message}"); } }