public String BuildSearchURL(ScrapeType function = ScrapeType.tweets, string MaxPosition = "", bool ForceCriteriaRefresh = false, String URL = "", bool ReplaceURL = false) { string tempURL = this.URL; if (!string.IsNullOrEmpty(URL)) { tempURL = URL; if (ReplaceURL) { this.URL = URL; } } this.function = function; if (ForceCriteriaRefresh || string.IsNullOrEmpty(this.Criteria)) { this.Criteria = Query.ToString(); } ModifiedURL = string.Format(tempURL, new string[] { function.ToString(), this.Criteria, MaxPosition }); Uri myUri = new Uri(ModifiedURL, UriKind.RelativeOrAbsolute); ModifiedURL = myUri.ToString().Replace(" ", "%20"); return(ModifiedURL); }
public static void RunScraper( string username, ScrapeType scrapeType, string destination, string loginUsername = null, string loginPassword = null) { string destinationArgument = $"--destination \"{destination}\""; string mediaTypeArgument = (scrapeType == ScrapeType.All) ? "" : $"--media-types {ScrapeTypeValues[scrapeType]}"; string loginDetailsArguments = (string.IsNullOrWhiteSpace(loginUsername) && string.IsNullOrWhiteSpace(loginPassword)) ? "" : $"--login-user {loginUsername} --login-pass {loginPassword}"; //--cookiejar \"{COOKIES_FILEPATH}\" string finalArguments = $" /K instagram-scraper {username} {loginDetailsArguments} {mediaTypeArgument} " + $"{destinationArgument} --interactive --retain-username " + "--template \"{username} {mediatype} {day}d{month}m{year}y {h};{m};{s}\" " + $"--latest-stamps \"{TIMESTAMPS_FILEPATH}\""; Console.WriteLine(finalArguments); ProcessStartInfo startInfo = new ProcessStartInfo { FileName = "CMD.EXE", Arguments = finalArguments, UseShellExecute = true, CreateNoWindow = true //, //WindowStyle = ProcessWindowStyle.Hidden }; Process process = new Process { StartInfo = startInfo }; process.Exited += Process_Exited; process.Start(); }
/// <summary> /// Scrapes the specified type. /// </summary> /// <param name="type">The type.</param> /// <param name="uri">The URI.</param> /// <param name="query">The query.</param> /// <returns></returns> public ScrapedPage Scrape(ScrapeType type, string uri, NameValueCollection query) { ScrapedPage page = new ScrapedPage(); string qs = BuildQueryString(query); page.QueryParameters = query; page.ScrapeType = type; switch (type) { case ScrapeType.GET: uri = uri.Contains("?") ? (uri + "&" + qs) : (uri + "?" + qs); page.RawStream = HttpGet(uri); break; case ScrapeType.POST: page.RawStream = HttpPost(uri, qs); break; default: throw new NotImplementedException(); } if (page.RawStream == null) { throw new Exception("No data for " + uri); } else { page.Url = new Uri(uri); Referer = uri; page = PostProcessData(page); } return(page); }
/// <summary> /// Initializes a new instance of the <see cref="ImageScrapeActionContext"/> class. /// </summary> /// <param name="imageScrapeType">Type of the image scrape.</param> /// <param name="scrapeType">Type of the scrape.</param> /// <param name="askIfMultipleResults">if set to <c>true</c> [ask if multiple results].</param> public ImageScrapeActionContext( ImageScrapeType imageScrapeType, ScrapeType scrapeType, bool askIfMultipleResults) : base(scrapeType, askIfMultipleResults) { this.imageScrapeType = imageScrapeType; }
/// <summary> /// Initializes a new instance of the <see cref="MovieImageScraperActionContext"/> class. /// </summary> /// <param name="dbMovie">The movie.</param> /// <param name="scrapeType">The type of scrape to perform.</param> /// <param name="askIfMultipleResults">if set to <c>true</c> ask the user to select a movie if multiple results are found.</param> public MovieImageScraperActionContext( Structures.DBMovie dbMovie, ImageScrapeType imageScrapeType, ScrapeType scrapeType, bool askIfMultipleResults) : base(imageScrapeType, scrapeType, askIfMultipleResults) { this.dbMovie = dbMovie; }
/// <summary> /// Initializes a new instance of the <see cref="MovieInfoScraperActionContext"/> class. /// </summary> /// <param name="dbMovie">The movie.</param> /// <param name="scrapeType">The type of scrape to perform.</param> /// <param name="askIfMultipleResults">if set to <c>true</c> ask the user to select a movie if multiple results are found.</param> /// <param name="options">The global scraper options.</param> public MovieInfoScraperActionContext( Structures.DBMovie dbMovie, ScrapeType scrapeType, bool askIfMultipleResults, Structures.ScrapeOptions options) : base(scrapeType, askIfMultipleResults) { this.dbMovie = dbMovie; this.options = options; }
/// <summary> /// Scrapes the specified type. /// </summary> /// <param name="type">The type.</param> /// <param name="uri">The URI.</param> /// <param name="userName">Name of the user.</param> /// <param name="password">The password.</param> /// <param name="query">The query.</param> /// <returns></returns> public static ScrapedPage SimpleScrape(ScrapeType type, string uri, string userName, string password, NameValueCollection query) { Scraper scraper = new Scraper(); if (!string.IsNullOrEmpty(userName)) { scraper.UseCredentials = true; scraper.SetNetworkCredentials(userName, password); } return(scraper.Scrape(type, uri, query)); }
/// <summary> /// Requireses the credentials. /// </summary> /// <param name="type">The type.</param> /// <param name="uri">The URI.</param> /// <param name="keyAndValuePairs">The key and value pairs.</param> /// <returns></returns> public bool RequiresCredentials(ScrapeType type, string uri, params string[] keyAndValuePairs) { try { UseCredentials = false; Scrape(type, uri, keyAndValuePairs); } catch (System.Net.WebException ex) { HttpWebResponse response = ex.Response as HttpWebResponse; if (response != null && response.StatusCode == HttpStatusCode.Unauthorized) { return(true); } } finally { UseCredentials = true; } return(false); }
/// <summary> /// resets vars and cleans up events after a scrape has finished /// </summary> private void ScrapeCleanup(ScrapeType type) { _stopwatch.Stop(); _scrapeReport.ScrapeEnded = DateTime.Now; _scrapeReport.TimeTaken = _stopwatch.Elapsed; if (ScrapeEnded != null) ScrapeEnded(this, new ScrapeEndedEventArgs(type)); _scrapeInProgress = false; }
/// <summary> /// Simples the scrape. /// </summary> /// <param name="type">The type.</param> /// <param name="uri">The URI.</param> /// <param name="query">The query.</param> /// <returns></returns> public static ScrapedPage SimpleScrape(ScrapeType type, string uri, NameValueCollection query) { return(SimpleScrape(type, uri, null, null, query)); }
/// <summary> /// Scrape multiple games that have been selected in the games library (either scrape unscraped, or re-scrape all) /// </summary> /// <param name="list"></param> /// <param name="scrapeType"></param> public async static void ScrapeMultiple(List <GamesLibraryModel> list, ScrapeType scrapeType, int systemId) { // instantiate instance of this class ScraperSearch gs = new ScraperSearch(); // get mainwindow MainWindow MWindow = Application.Current.Windows.OfType <MainWindow>().FirstOrDefault(); // popup progress dialog var mySettings = new MetroDialogSettings() { NegativeButtonText = "Cancel Scraping", AnimateShow = false, AnimateHide = false }; var controller = await MWindow.ShowProgressAsync("Scraping Data", "Initialising...", true, settings : mySettings); controller.SetCancelable(true); await Task.Delay(100); await Task.Run(() => { // check whether list is null and generate it if it is (so scraping/rescraping whole systems or favorites) if (list == null) { list = new List <GamesLibraryModel>(); List <Game> games = new List <Game>(); if (scrapeType == ScrapeType.Favorites || scrapeType == ScrapeType.RescrapeFavorites) { games = Game.GetGames().Where(a => a.isFavorite == true && a.hidden != true).ToList(); } else { // get all games that have matching systemId and are not marked as hidden games = Game.GetGames(systemId).Where(a => a.hidden != true).ToList(); } // populate list foreach (var g in games) { GamesLibraryModel glm = new GamesLibraryModel(); glm.ID = g.gameId; list.Add(glm); } } // iterate through each game in list - match local then scrape int iter = 0; int maxCount = list.Count(); int skip = 0; controller.Minimum = iter; controller.Maximum = maxCount; foreach (var game in list) { if (controller.IsCanceled) { controller.CloseAsync(); return; } iter++; controller.SetProgress(iter); // game game object from the database Game g = Game.GetGame(game.ID); string countString = iter + " of " + maxCount + " (" + skip + " skipped)"; switch (scrapeType) { // scrape selected games (that have no been scraped yet) case ScrapeType.Selected: case ScrapeType.Favorites: case ScrapeType.ScrapeSystem: if (g.gdbId == null || g.gdbId == 0 || Directory.Exists(AppDomain.CurrentDomain.BaseDirectory + @"Data\Games\" + g.gdbId.ToString()) == false) { // scraping can happen DoScrape(controller, g, countString, gs, false); } else { // the game already has a valid gdbid set AND has a game directory on disc. skip++; } break; // rescrape all selected games case ScrapeType.SelectedRescrape: case ScrapeType.RescrapeFavorites: case ScrapeType.RescrapeSystem: // scraping must always happen DoScrape(controller, g, countString, gs, true); break; } } }); await controller.CloseAsync(); if (controller.IsCanceled) { await MWindow.ShowMessageAsync("MedLaunch Scraper", "Scraping Cancelled"); GamesLibraryVisualHandler.RefreshGamesLibrary(); } else { await MWindow.ShowMessageAsync("MedLaunch Scraper", "Scraping Completed"); GamesLibraryVisualHandler.RefreshGamesLibrary(); } }
/// <summary> /// Initiates vars and events prior to starting a scrape /// </summary> /// <returns></returns> private bool ScrapeInitiation(ScrapeType type) { if (_scrapeInProgress) return false; _scrapeInProgress = true; if (ScrapeStarted != null) ScrapeStarted(this, new ScrapeStartedEventArgs(type)); _scrapeReport = new ScrapeReport { ScrapeStarted = DateTime.Now }; _stopwatch = new Stopwatch(); _stopwatch.Start(); _scrapeCancelled = false; return true; }
/// <summary> /// Simples the scrape. /// </summary> /// <param name="type">The type.</param> /// <param name="uri">The URI.</param> /// <param name="query">The query.</param> /// <returns></returns> public static ScrapedPage SimpleScrape(ScrapeType type, string uri, NameValueCollection query) { return SimpleScrape(type, uri, null, null, query); }
/// <summary> /// Initializes a new instance of the <see cref="ScraperActionContext"/> class. /// </summary> /// <param name="scrapeType">The type of scrape to perform.</param> /// <param name="askIfMultipleResults">if set to <c>true</c> ask the user to select a movie if multiple results are found.</param> public ScraperActionContext(ScrapeType scrapeType, bool askIfMultipleResults) { this.scrapeType = scrapeType; this.askIfMultipleResults = askIfMultipleResults; }
/* Methods */ public async static void ScrapeGamesMultiple(List <DataGridGamesView> list, ScrapeType scrapeType) { // instantiate instance of this class ScraperMainSearch gs = new ScraperMainSearch(); // get mainwindow MainWindow MWindow = Application.Current.Windows.OfType <MainWindow>().FirstOrDefault(); // popup progress dialog var mySettings = new MetroDialogSettings() { NegativeButtonText = "Cancel Scraping", AnimateShow = false, AnimateHide = false }; var controller = await MWindow.ShowProgressAsync("Scraping Data", "Initialising...", true, settings : mySettings); controller.SetCancelable(true); await Task.Delay(100); await Task.Run(() => { gs.LocalGames = new List <Game>(); // iterate through each game, look it up and pass it to gs.LocalGames foreach (var game in list) { Game g = Game.GetGame(game.ID); if (scrapeType == ScrapeType.Selected) { if (g.gdbId == null || g.gdbId == 0 || Directory.Exists(AppDomain.CurrentDomain.BaseDirectory + @"Data\Games\" + g.gdbId.ToString()) == false) { // scraping can happen gs.LocalGames.Add(g); } } if (scrapeType == ScrapeType.SelectedRescrape) { gs.LocalGames.Add(g); } } // count number of games to scan for int numGames = gs.LocalGames.Count; controller.Minimum = 0; controller.Maximum = numGames; int i = 0; // iterate through each local game and attempt to match it with the master list foreach (var g in gs.LocalGames) { if (controller.IsCanceled) { controller.CloseAsync(); return; } i++; controller.SetProgress(i); controller.SetMessage("Attempting local search match for:\n" + g.gameName + " (" + GSystem.GetSystemCode(g.systemId) + ")" + "\n(" + i + " of " + numGames + ")"); List <ScraperMaster> results = gs.SearchGameLocal(g.gameName, g.systemId, g.gameId).ToList(); if (results.Count == 0) { // no results returned } if (results.Count == 1) { // one result returned - add GdbId to the Game table Game.SetGdbId(g.gameId, results.Single().GamesDbId); } } /* Begin actual scraping */ // Get all games that have a GdbId set and determine if they need scraping (is json file present for them) var gamesTmp = gs.LocalGames; gs.LocalGames = new List <Game>(); foreach (var g in gamesTmp) { if (g.gdbId == null || g.gdbId == 0) { continue; } if (scrapeType == ScrapeType.SelectedRescrape) { gs.LocalGames.Add(g); continue; } // check each game directory if (!Directory.Exists(AppDomain.CurrentDomain.BaseDirectory + @"Data\Games\" + g.gdbId.ToString())) { // directory does not exist - scraping needed gs.LocalGames.Add(g); } else { // directory does exist - check whether json file is present if (!File.Exists(AppDomain.CurrentDomain.BaseDirectory + @"Data\Games\" + g.gdbId.ToString() + @"\" + g.gdbId.ToString() + ".json")) { // json file is not present - scraping needed gs.LocalGames.Add(g); } } } int gamesCount = gs.LocalGames.Count; i = 0; controller.Minimum = 0; controller.Maximum = gamesCount; foreach (var g in gs.LocalGames) { if (controller.IsCanceled) { controller.CloseAsync(); return; } // iterate through each game that requires scraping and attempt to download the data and import to database i++; controller.SetProgress(i); string message = "Scraping Started....\nGetting data for: " + g.gameName + " (" + GSystem.GetSystemCode(g.systemId) + ")" + "\n(" + i + " of " + gamesCount + ")\n\n"; controller.SetMessage(message); // do actual scraping ScraperHandler sh = new ScraperHandler(g.gdbId.Value, g.gameId, false); sh.ScrapeGame(controller); GameListBuilder.UpdateFlag(); } }); await controller.CloseAsync(); if (controller.IsCanceled) { await MWindow.ShowMessageAsync("MedLaunch Scraper", "Scraping Cancelled"); GamesLibraryVisualHandler.RefreshGamesLibrary(); } else { await MWindow.ShowMessageAsync("MedLaunch Scraper", "Scraping Completed"); GamesLibraryVisualHandler.RefreshGamesLibrary(); } }
/// <summary> /// Scrapes the specified type. /// </summary> /// <param name="type">The type.</param> /// <param name="uri">The URI.</param> /// <param name="keyAndValuePairs">The key and value pairs.</param> /// <returns></returns> public static ScrapedPage SimpleScrape(ScrapeType type, string uri, params string[] keyAndValuePairs) { NameValueCollection query = GetNameValueCollectionFromParams(keyAndValuePairs); return(SimpleScrape(type, uri, query)); }
/// <summary> /// Scrapes the specified type. /// </summary> /// <param name="type">The type.</param> /// <param name="uri">The URI.</param> /// <param name="keyAndValuePairs">The key and value pairs.</param> /// <returns></returns> public ScrapedPage Scrape(ScrapeType type, string uri, params string[] keyAndValuePairs) { NameValueCollection query = GetNameValueCollectionFromParams(keyAndValuePairs); return Scrape(type, uri, query); }
/// <summary> /// Requireses the credentials. /// </summary> /// <param name="type">The type.</param> /// <param name="uri">The URI.</param> /// <param name="keyAndValuePairs">The key and value pairs.</param> /// <returns></returns> public bool RequiresCredentials(ScrapeType type, string uri, params string[] keyAndValuePairs) { try { UseCredentials = false; Scrape(type, uri, keyAndValuePairs); } catch (System.Net.WebException ex) { HttpWebResponse response = ex.Response as HttpWebResponse; if (response != null && response.StatusCode == HttpStatusCode.Unauthorized) { return true; } } finally { UseCredentials = true; } return false; }
/// <summary> /// Scrapes the specified type. /// </summary> /// <param name="type">The type.</param> /// <param name="uri">The URI.</param> /// <param name="userName">Name of the user.</param> /// <param name="password">The password.</param> /// <param name="query">The query.</param> /// <returns></returns> public static ScrapedPage SimpleScrape(ScrapeType type, string uri, string userName, string password, NameValueCollection query) { Scraper scraper = new Scraper(); if (!string.IsNullOrEmpty(userName)) { scraper.UseCredentials = true; scraper.SetNetworkCredentials(userName, password); } return scraper.Scrape(type, uri, query); }
public ScrapeEndedEventArgs(ScrapeType type) { Type = type; Ended = DateTime.Now; }
public ScrapeStartedEventArgs(ScrapeType type) { Type = type; Started = DateTime.Now; }
/// <summary> /// Scrapes the specified type. /// </summary> /// <param name="type">The type.</param> /// <param name="uri">The URI.</param> /// <param name="query">The query.</param> /// <returns></returns> public ScrapedPage Scrape(ScrapeType type, string uri, NameValueCollection query) { ScrapedPage page = new ScrapedPage(); string qs = BuildQueryString(query); page.QueryParameters = query; page.ScrapeType = type; switch (type) { case ScrapeType.GET: uri = uri.Contains("?") ? (uri + "&" + qs) : (uri + "?" + qs); page.RawStream = HttpGet(uri); break; case ScrapeType.POST: page.RawStream = HttpPost(uri, qs); break; default: throw new NotImplementedException(); } if (page.RawStream == null) { throw new Exception("No data for " + uri); } else { page.Url = new Uri(uri); Referer = uri; page = PostProcessData(page); } return page; }