Beispiel #1
0
        public String BuildSearchURL(ScrapeType function = ScrapeType.tweets, string MaxPosition = "", bool ForceCriteriaRefresh = false, String URL = "", bool ReplaceURL = false)
        {
            string tempURL = this.URL;

            if (!string.IsNullOrEmpty(URL))
            {
                tempURL = URL;

                if (ReplaceURL)
                {
                    this.URL = URL;
                }
            }
            this.function = function;

            if (ForceCriteriaRefresh || string.IsNullOrEmpty(this.Criteria))
            {
                this.Criteria = Query.ToString();
            }

            ModifiedURL = string.Format(tempURL, new string[] { function.ToString(), this.Criteria, MaxPosition });

            Uri myUri = new Uri(ModifiedURL, UriKind.RelativeOrAbsolute);

            ModifiedURL = myUri.ToString().Replace(" ", "%20");

            return(ModifiedURL);
        }
Beispiel #2
0
        public static void RunScraper(
            string username,
            ScrapeType scrapeType,
            string destination,
            string loginUsername = null,
            string loginPassword = null)
        {
            string destinationArgument   = $"--destination \"{destination}\"";
            string mediaTypeArgument     = (scrapeType == ScrapeType.All) ? "" : $"--media-types {ScrapeTypeValues[scrapeType]}";
            string loginDetailsArguments = (string.IsNullOrWhiteSpace(loginUsername) && string.IsNullOrWhiteSpace(loginPassword)) ? "" : $"--login-user {loginUsername} --login-pass {loginPassword}";


            //--cookiejar \"{COOKIES_FILEPATH}\"
            string finalArguments = $" /K instagram-scraper {username} {loginDetailsArguments} {mediaTypeArgument}  " +
                                    $"{destinationArgument} --interactive --retain-username " +
                                    "--template \"{username} {mediatype} {day}d{month}m{year}y  {h};{m};{s}\" " +
                                    $"--latest-stamps \"{TIMESTAMPS_FILEPATH}\"";

            Console.WriteLine(finalArguments);

            ProcessStartInfo startInfo = new ProcessStartInfo {
                FileName        = "CMD.EXE",
                Arguments       = finalArguments,
                UseShellExecute = true,
                CreateNoWindow  = true //,
                                       //WindowStyle = ProcessWindowStyle.Hidden
            };

            Process process = new Process {
                StartInfo = startInfo
            };

            process.Exited += Process_Exited;
            process.Start();
        }
        /// <summary>
        /// Scrapes the specified type.
        /// </summary>
        /// <param name="type">The type.</param>
        /// <param name="uri">The URI.</param>
        /// <param name="query">The query.</param>
        /// <returns></returns>
        public ScrapedPage Scrape(ScrapeType type, string uri, NameValueCollection query)
        {
            ScrapedPage page = new ScrapedPage();
            string      qs   = BuildQueryString(query);

            page.QueryParameters = query;
            page.ScrapeType      = type;
            switch (type)
            {
            case ScrapeType.GET:
                uri            = uri.Contains("?") ? (uri + "&" + qs) : (uri + "?" + qs);
                page.RawStream = HttpGet(uri);
                break;

            case ScrapeType.POST:
                page.RawStream = HttpPost(uri, qs);
                break;

            default:
                throw new NotImplementedException();
            }
            if (page.RawStream == null)
            {
                throw new Exception("No data for " + uri);
            }
            else
            {
                page.Url = new Uri(uri);
                Referer  = uri;

                page = PostProcessData(page);
            }
            return(page);
        }
Beispiel #4
0
 /// <summary>
 /// Initializes a new instance of the <see cref="ImageScrapeActionContext"/> class.
 /// </summary>
 /// <param name="imageScrapeType">Type of the image scrape.</param>
 /// <param name="scrapeType">Type of the scrape.</param>
 /// <param name="askIfMultipleResults">if set to <c>true</c> [ask if multiple results].</param>
 public ImageScrapeActionContext(
     ImageScrapeType imageScrapeType,
     ScrapeType scrapeType,
     bool askIfMultipleResults)
     : base(scrapeType, askIfMultipleResults)
 {
     this.imageScrapeType = imageScrapeType;
 }
 /// <summary>
 /// Initializes a new instance of the <see cref="ImageScrapeActionContext"/> class.
 /// </summary>
 /// <param name="imageScrapeType">Type of the image scrape.</param>
 /// <param name="scrapeType">Type of the scrape.</param>
 /// <param name="askIfMultipleResults">if set to <c>true</c> [ask if multiple results].</param>
 public ImageScrapeActionContext(
     ImageScrapeType imageScrapeType,
     ScrapeType scrapeType,
     bool askIfMultipleResults)
     : base(scrapeType, askIfMultipleResults)
 {
     this.imageScrapeType = imageScrapeType;
 }
Beispiel #6
0
 /// <summary>
 /// Initializes a new instance of the <see cref="MovieImageScraperActionContext"/> class.
 /// </summary>
 /// <param name="dbMovie">The movie.</param>
 /// <param name="scrapeType">The type of scrape to perform.</param>
 /// <param name="askIfMultipleResults">if set to <c>true</c> ask the user to select a movie if multiple results are found.</param>
 public MovieImageScraperActionContext(
     Structures.DBMovie dbMovie,
     ImageScrapeType imageScrapeType,
     ScrapeType scrapeType,
     bool askIfMultipleResults)
     : base(imageScrapeType, scrapeType, askIfMultipleResults)
 {
     this.dbMovie = dbMovie;
 }
 /// <summary>
 /// Initializes a new instance of the <see cref="MovieImageScraperActionContext"/> class.
 /// </summary>
 /// <param name="dbMovie">The movie.</param>
 /// <param name="scrapeType">The type of scrape to perform.</param>
 /// <param name="askIfMultipleResults">if set to <c>true</c> ask the user to select a movie if multiple results are found.</param>
 public MovieImageScraperActionContext(
     Structures.DBMovie dbMovie,
     ImageScrapeType imageScrapeType,
     ScrapeType scrapeType,
     bool askIfMultipleResults)
     : base(imageScrapeType, scrapeType, askIfMultipleResults)
 {
     this.dbMovie = dbMovie;
 }
 /// <summary>
 /// Initializes a new instance of the <see cref="MovieInfoScraperActionContext"/> class.
 /// </summary>
 /// <param name="dbMovie">The movie.</param>
 /// <param name="scrapeType">The type of scrape to perform.</param>
 /// <param name="askIfMultipleResults">if set to <c>true</c> ask the user to select a movie if multiple results are found.</param>
 /// <param name="options">The global scraper options.</param>
 public MovieInfoScraperActionContext(
     Structures.DBMovie dbMovie,
     ScrapeType scrapeType,
     bool askIfMultipleResults,
     Structures.ScrapeOptions options)
     : base(scrapeType, askIfMultipleResults)
 {
     this.dbMovie = dbMovie;
     this.options = options;
 }
Beispiel #9
0
 /// <summary>
 /// Initializes a new instance of the <see cref="MovieInfoScraperActionContext"/> class.
 /// </summary>
 /// <param name="dbMovie">The movie.</param>
 /// <param name="scrapeType">The type of scrape to perform.</param>
 /// <param name="askIfMultipleResults">if set to <c>true</c> ask the user to select a movie if multiple results are found.</param>
 /// <param name="options">The global scraper options.</param>
 public MovieInfoScraperActionContext(
     Structures.DBMovie dbMovie,
     ScrapeType scrapeType,
     bool askIfMultipleResults,
     Structures.ScrapeOptions options)
     : base(scrapeType, askIfMultipleResults)
 {
     this.dbMovie = dbMovie;
     this.options = options;
 }
        /// <summary>
        /// Scrapes the specified type.
        /// </summary>
        /// <param name="type">The type.</param>
        /// <param name="uri">The URI.</param>
        /// <param name="userName">Name of the user.</param>
        /// <param name="password">The password.</param>
        /// <param name="query">The query.</param>
        /// <returns></returns>
        public static ScrapedPage SimpleScrape(ScrapeType type, string uri, string userName, string password, NameValueCollection query)
        {
            Scraper scraper = new Scraper();

            if (!string.IsNullOrEmpty(userName))
            {
                scraper.UseCredentials = true;
                scraper.SetNetworkCredentials(userName, password);
            }
            return(scraper.Scrape(type, uri, query));
        }
        /// <summary>
        /// Requireses the credentials.
        /// </summary>
        /// <param name="type">The type.</param>
        /// <param name="uri">The URI.</param>
        /// <param name="keyAndValuePairs">The key and value pairs.</param>
        /// <returns></returns>
        public bool RequiresCredentials(ScrapeType type, string uri, params string[] keyAndValuePairs)
        {
            try
            {
                UseCredentials = false;
                Scrape(type, uri, keyAndValuePairs);
            }
            catch (System.Net.WebException ex)
            {
                HttpWebResponse response = ex.Response as HttpWebResponse;
                if (response != null && response.StatusCode == HttpStatusCode.Unauthorized)
                {
                    return(true);
                }
            }
            finally
            {
                UseCredentials = true;
            }

            return(false);
        }
        /// <summary>
        /// resets vars and cleans up events after a scrape has finished
        /// </summary>
        private void ScrapeCleanup(ScrapeType type)
        {
            _stopwatch.Stop();

            _scrapeReport.ScrapeEnded = DateTime.Now;
            _scrapeReport.TimeTaken = _stopwatch.Elapsed;

            if (ScrapeEnded != null) ScrapeEnded(this, new ScrapeEndedEventArgs(type));


            _scrapeInProgress = false;
        }
 /// <summary>
 /// Simples the scrape.
 /// </summary>
 /// <param name="type">The type.</param>
 /// <param name="uri">The URI.</param>
 /// <param name="query">The query.</param>
 /// <returns></returns>
 public static ScrapedPage SimpleScrape(ScrapeType type, string uri, NameValueCollection query)
 {
     return(SimpleScrape(type, uri, null, null, query));
 }
Beispiel #14
0
        /// <summary>
        /// Scrape multiple games that have been selected in the games library (either scrape unscraped, or re-scrape all)
        /// </summary>
        /// <param name="list"></param>
        /// <param name="scrapeType"></param>
        public async static void ScrapeMultiple(List <GamesLibraryModel> list, ScrapeType scrapeType, int systemId)
        {
            // instantiate instance of this class
            ScraperSearch gs = new ScraperSearch();

            // get mainwindow
            MainWindow MWindow = Application.Current.Windows.OfType <MainWindow>().FirstOrDefault();

            // popup progress dialog
            var mySettings = new MetroDialogSettings()
            {
                NegativeButtonText = "Cancel Scraping",
                AnimateShow        = false,
                AnimateHide        = false
            };
            var controller = await MWindow.ShowProgressAsync("Scraping Data", "Initialising...", true, settings : mySettings);

            controller.SetCancelable(true);
            await Task.Delay(100);

            await Task.Run(() =>
            {
                // check whether list is null and generate it if it is (so scraping/rescraping whole systems or favorites)
                if (list == null)
                {
                    list = new List <GamesLibraryModel>();
                    List <Game> games = new List <Game>();
                    if (scrapeType == ScrapeType.Favorites || scrapeType == ScrapeType.RescrapeFavorites)
                    {
                        games = Game.GetGames().Where(a => a.isFavorite == true && a.hidden != true).ToList();
                    }
                    else
                    {
                        // get all games that have matching systemId and are not marked as hidden
                        games = Game.GetGames(systemId).Where(a => a.hidden != true).ToList();
                    }

                    // populate list
                    foreach (var g in games)
                    {
                        GamesLibraryModel glm = new GamesLibraryModel();
                        glm.ID = g.gameId;
                        list.Add(glm);
                    }
                }


                // iterate through each game in list - match local then scrape
                int iter           = 0;
                int maxCount       = list.Count();
                int skip           = 0;
                controller.Minimum = iter;
                controller.Maximum = maxCount;
                foreach (var game in list)
                {
                    if (controller.IsCanceled)
                    {
                        controller.CloseAsync();
                        return;
                    }

                    iter++;
                    controller.SetProgress(iter);
                    // game game object from the database
                    Game g = Game.GetGame(game.ID);

                    string countString = iter + " of " + maxCount + " (" + skip + " skipped)";

                    switch (scrapeType)
                    {
                    // scrape selected games (that have no been scraped yet)
                    case ScrapeType.Selected:
                    case ScrapeType.Favorites:
                    case ScrapeType.ScrapeSystem:
                        if (g.gdbId == null || g.gdbId == 0 || Directory.Exists(AppDomain.CurrentDomain.BaseDirectory + @"Data\Games\" + g.gdbId.ToString()) == false)
                        {
                            // scraping can happen
                            DoScrape(controller, g, countString, gs, false);
                        }
                        else
                        {
                            // the game already has a valid gdbid set AND has a game directory on disc.
                            skip++;
                        }
                        break;

                    // rescrape all selected games
                    case ScrapeType.SelectedRescrape:
                    case ScrapeType.RescrapeFavorites:
                    case ScrapeType.RescrapeSystem:
                        // scraping must always happen
                        DoScrape(controller, g, countString, gs, true);
                        break;
                    }
                }
            });

            await controller.CloseAsync();

            if (controller.IsCanceled)
            {
                await MWindow.ShowMessageAsync("MedLaunch Scraper", "Scraping Cancelled");

                GamesLibraryVisualHandler.RefreshGamesLibrary();
            }
            else
            {
                await MWindow.ShowMessageAsync("MedLaunch Scraper", "Scraping Completed");

                GamesLibraryVisualHandler.RefreshGamesLibrary();
            }
        }
        /// <summary>
        /// Initiates vars and events prior to starting a scrape
        /// </summary>
        /// <returns></returns>
        private bool ScrapeInitiation(ScrapeType type)
        {
            if (_scrapeInProgress)
                return false;

            _scrapeInProgress = true;

            if (ScrapeStarted != null) ScrapeStarted(this, new ScrapeStartedEventArgs(type));
            _scrapeReport = new ScrapeReport { ScrapeStarted = DateTime.Now };

            _stopwatch = new Stopwatch();
            _stopwatch.Start();

            _scrapeCancelled = false;



            return true;
        }
Beispiel #16
0
 /// <summary>
 /// Simples the scrape.
 /// </summary>
 /// <param name="type">The type.</param>
 /// <param name="uri">The URI.</param>
 /// <param name="query">The query.</param>
 /// <returns></returns>
 public static ScrapedPage SimpleScrape(ScrapeType type, string uri, NameValueCollection query)
 {
     return SimpleScrape(type, uri, null, null, query);
 }
Beispiel #17
0
 /// <summary>
 /// Initializes a new instance of the <see cref="ScraperActionContext"/> class.
 /// </summary>
 /// <param name="scrapeType">The type of scrape to perform.</param>
 /// <param name="askIfMultipleResults">if set to <c>true</c> ask the user to select a movie if multiple results are found.</param>
 public ScraperActionContext(ScrapeType scrapeType, bool askIfMultipleResults)
 {
     this.scrapeType           = scrapeType;
     this.askIfMultipleResults = askIfMultipleResults;
 }
Beispiel #18
0
        /* Methods */

        public async static void ScrapeGamesMultiple(List <DataGridGamesView> list, ScrapeType scrapeType)
        {
            // instantiate instance of this class
            ScraperMainSearch gs = new ScraperMainSearch();

            // get mainwindow
            MainWindow MWindow = Application.Current.Windows.OfType <MainWindow>().FirstOrDefault();

            // popup progress dialog
            var mySettings = new MetroDialogSettings()
            {
                NegativeButtonText = "Cancel Scraping",
                AnimateShow        = false,
                AnimateHide        = false
            };
            var controller = await MWindow.ShowProgressAsync("Scraping Data", "Initialising...", true, settings : mySettings);

            controller.SetCancelable(true);
            await Task.Delay(100);

            await Task.Run(() =>
            {
                gs.LocalGames = new List <Game>();

                // iterate through each game, look it up and pass it to gs.LocalGames
                foreach (var game in list)
                {
                    Game g = Game.GetGame(game.ID);

                    if (scrapeType == ScrapeType.Selected)
                    {
                        if (g.gdbId == null || g.gdbId == 0 || Directory.Exists(AppDomain.CurrentDomain.BaseDirectory + @"Data\Games\" + g.gdbId.ToString()) == false)
                        {
                            // scraping can happen
                            gs.LocalGames.Add(g);
                        }
                    }

                    if (scrapeType == ScrapeType.SelectedRescrape)
                    {
                        gs.LocalGames.Add(g);
                    }
                }

                // count number of games to scan for
                int numGames       = gs.LocalGames.Count;
                controller.Minimum = 0;
                controller.Maximum = numGames;
                int i = 0;
                // iterate through each local game and attempt to match it with the master list
                foreach (var g in gs.LocalGames)
                {
                    if (controller.IsCanceled)
                    {
                        controller.CloseAsync();
                        return;
                    }
                    i++;
                    controller.SetProgress(i);
                    controller.SetMessage("Attempting local search match for:\n" + g.gameName + " (" + GSystem.GetSystemCode(g.systemId) + ")" + "\n(" + i + " of " + numGames + ")");
                    List <ScraperMaster> results = gs.SearchGameLocal(g.gameName, g.systemId, g.gameId).ToList();

                    if (results.Count == 0)
                    {
                        // no results returned
                    }
                    if (results.Count == 1)
                    {
                        // one result returned - add GdbId to the Game table
                        Game.SetGdbId(g.gameId, results.Single().GamesDbId);
                    }
                }

                /* Begin actual scraping */

                // Get all games that have a GdbId set and determine if they need scraping (is json file present for them)
                var gamesTmp  = gs.LocalGames;
                gs.LocalGames = new List <Game>();
                foreach (var g in gamesTmp)
                {
                    if (g.gdbId == null || g.gdbId == 0)
                    {
                        continue;
                    }

                    if (scrapeType == ScrapeType.SelectedRescrape)
                    {
                        gs.LocalGames.Add(g);
                        continue;
                    }

                    // check each game directory
                    if (!Directory.Exists(AppDomain.CurrentDomain.BaseDirectory + @"Data\Games\" + g.gdbId.ToString()))
                    {
                        // directory does not exist - scraping needed
                        gs.LocalGames.Add(g);
                    }
                    else
                    {
                        // directory does exist - check whether json file is present
                        if (!File.Exists(AppDomain.CurrentDomain.BaseDirectory + @"Data\Games\" + g.gdbId.ToString() + @"\" + g.gdbId.ToString() + ".json"))
                        {
                            // json file is not present - scraping needed
                            gs.LocalGames.Add(g);
                        }
                    }
                }

                int gamesCount = gs.LocalGames.Count;
                i = 0;
                controller.Minimum = 0;
                controller.Maximum = gamesCount;
                foreach (var g in gs.LocalGames)
                {
                    if (controller.IsCanceled)
                    {
                        controller.CloseAsync();
                        return;
                    }

                    // iterate through each game that requires scraping and attempt to download the data and import to database
                    i++;
                    controller.SetProgress(i);
                    string message = "Scraping Started....\nGetting data for: " + g.gameName + " (" + GSystem.GetSystemCode(g.systemId) + ")" + "\n(" + i + " of " + gamesCount + ")\n\n";
                    controller.SetMessage(message);

                    // do actual scraping
                    ScraperHandler sh = new ScraperHandler(g.gdbId.Value, g.gameId, false);
                    sh.ScrapeGame(controller);
                    GameListBuilder.UpdateFlag();
                }
            });

            await controller.CloseAsync();

            if (controller.IsCanceled)
            {
                await MWindow.ShowMessageAsync("MedLaunch Scraper", "Scraping Cancelled");

                GamesLibraryVisualHandler.RefreshGamesLibrary();
            }
            else
            {
                await MWindow.ShowMessageAsync("MedLaunch Scraper", "Scraping Completed");

                GamesLibraryVisualHandler.RefreshGamesLibrary();
            }
        }
        /// <summary>
        /// Scrapes the specified type.
        /// </summary>
        /// <param name="type">The type.</param>
        /// <param name="uri">The URI.</param>
        /// <param name="keyAndValuePairs">The key and value pairs.</param>
        /// <returns></returns>
        public static ScrapedPage SimpleScrape(ScrapeType type, string uri, params string[] keyAndValuePairs)
        {
            NameValueCollection query = GetNameValueCollectionFromParams(keyAndValuePairs);

            return(SimpleScrape(type, uri, query));
        }
Beispiel #20
0
 /// <summary>
 /// Scrapes the specified type.
 /// </summary>
 /// <param name="type">The type.</param>
 /// <param name="uri">The URI.</param>
 /// <param name="keyAndValuePairs">The key and value pairs.</param>
 /// <returns></returns>
 public ScrapedPage Scrape(ScrapeType type, string uri, params string[] keyAndValuePairs)
 {
     NameValueCollection query = GetNameValueCollectionFromParams(keyAndValuePairs);
     return Scrape(type, uri, query);
 }
Beispiel #21
0
        /// <summary>
        /// Requireses the credentials.
        /// </summary>
        /// <param name="type">The type.</param>
        /// <param name="uri">The URI.</param>
        /// <param name="keyAndValuePairs">The key and value pairs.</param>
        /// <returns></returns>
        public bool RequiresCredentials(ScrapeType type, string uri, params string[] keyAndValuePairs)
        {
            try
            {
                UseCredentials = false;
                Scrape(type, uri, keyAndValuePairs);
            }
            catch (System.Net.WebException ex)
            {
                HttpWebResponse response = ex.Response as HttpWebResponse;
                if (response != null && response.StatusCode == HttpStatusCode.Unauthorized)
                {
                    return true;
                }
            }
            finally
            {
                UseCredentials = true;
            }

            return false;
        }
Beispiel #22
0
 /// <summary>
 /// Scrapes the specified type.
 /// </summary>
 /// <param name="type">The type.</param>
 /// <param name="uri">The URI.</param>
 /// <param name="userName">Name of the user.</param>
 /// <param name="password">The password.</param>
 /// <param name="query">The query.</param>
 /// <returns></returns>
 public static ScrapedPage SimpleScrape(ScrapeType type, string uri, string userName, string password, NameValueCollection query)
 {
     Scraper scraper = new Scraper();
     if (!string.IsNullOrEmpty(userName))
     {
         scraper.UseCredentials = true;
         scraper.SetNetworkCredentials(userName, password);
     }
     return scraper.Scrape(type, uri, query);
 }
Beispiel #23
0
 public ScrapeEndedEventArgs(ScrapeType type)
 {
     Type = type;
     Ended = DateTime.Now;
 }
Beispiel #24
0
 public ScrapeStartedEventArgs(ScrapeType type)
 {
     Type = type;
     Started = DateTime.Now;
 }
Beispiel #25
0
        /// <summary>
        /// Scrapes the specified type.
        /// </summary>
        /// <param name="type">The type.</param>
        /// <param name="uri">The URI.</param>
        /// <param name="query">The query.</param>
        /// <returns></returns>
        public ScrapedPage Scrape(ScrapeType type, string uri, NameValueCollection query)
        {
            ScrapedPage page = new ScrapedPage();
            string qs = BuildQueryString(query);
            page.QueryParameters = query;
            page.ScrapeType = type;
            switch (type)
            {
                case ScrapeType.GET:
                    uri = uri.Contains("?") ? (uri + "&" + qs) : (uri + "?" + qs);
                    page.RawStream = HttpGet(uri);
                    break;
                case ScrapeType.POST:
                    page.RawStream = HttpPost(uri, qs);
                    break;
                default:
                    throw new NotImplementedException();
            }
            if (page.RawStream == null)
            {
                throw new Exception("No data for " + uri);
            }
            else
            {
                page.Url = new Uri(uri);
                Referer = uri;

                page = PostProcessData(page);
            }
            return page;
        }
 /// <summary>
 /// Initializes a new instance of the <see cref="ScraperActionContext"/> class.
 /// </summary>
 /// <param name="scrapeType">The type of scrape to perform.</param>
 /// <param name="askIfMultipleResults">if set to <c>true</c> ask the user to select a movie if multiple results are found.</param>
 public ScraperActionContext(ScrapeType scrapeType, bool askIfMultipleResults)
 {
     this.scrapeType = scrapeType;
     this.askIfMultipleResults = askIfMultipleResults;
 }