Ejemplo n.º 1
0
        /// <summary>
        /// Master handler for parsing data from the mobygames website
        /// </summary>
        /// <param name="o"></param>
        /// <param name="controller"></param>
        /// <returns></returns>
        public static ScrapedGameObjectWeb PullWebpageData(ScrapedGameObjectWeb o, ScraperMaster masterrecord, ProgressDialogController controller, ScraperOrder order, string message)
        {
            // query the main game page
            string baseurl     = "http://www.mobygames.com/game/";
            string param       = masterrecord.MOBYPlatformAlias + "/" + masterrecord.MOBYAlias;
            string initialPage = ReturnWebpage(baseurl, param, 10000);

            // response error checking
            switch (initialPage)
            {
            case "ERROR - Server Error":
            case "ERROR - Not Found":
                return(o);
            }

            GlobalSettings gs = GlobalSettings.GetGlobals();

            // convert page string to htmldoc
            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(initialPage);

            // get core information
            List <HtmlNode> divs = doc.DocumentNode.SelectNodes("//div").ToList();

            //List<HtmlNode> coreGenreList = coreGenre.ChildNodes.ToList();
            int divcount = divs.Count;

            for (int i = 0; i < divcount; i++)
            {
                if (controller.IsCanceled == true)
                {
                    controller.CloseAsync();
                    return(o);
                }

                // get just headings
                if (divs[i].InnerText == divs[i].InnerHtml)
                {
                    // this is just a heading - i+1 should give the value
                    if (divs[i].InnerHtml == "Published by")
                    {
                        if (order == ScraperOrder.Primary || o.Data.Publisher == null)
                        {
                            o.Data.Publisher = WebUtility.HtmlDecode(divs[i + 1].InnerText);
                        }
                    }
                    if (divs[i].InnerHtml == "Developed by")
                    {
                        if (order == ScraperOrder.Primary || o.Data.Developer == null)
                        {
                            o.Data.Developer = WebUtility.HtmlDecode(divs[i + 1].InnerText);
                        }
                    }
                    if (divs[i].InnerHtml == "Released")
                    {
                        if (order == ScraperOrder.Primary || o.Data.Released == null)
                        {
                            o.Data.Released = WebUtility.HtmlDecode(divs[i + 1].InnerText);
                        }
                    }
                    if (divs[i].InnerHtml == "ESRB Rating")
                    {
                        if (order == ScraperOrder.Primary || o.Data.ESRB == null)
                        {
                            o.Data.ESRB = WebUtility.HtmlDecode(divs[i + 1].InnerText);
                        }
                    }
                    if (divs[i].InnerHtml == "Genre")
                    {
                        string   genres   = WebUtility.HtmlDecode(divs[i + 1].InnerText);
                        string[] genreArr = genres.Split('/');
                        if (o.Data.Genres == null)
                        {
                            o.Data.Genres = new List <string>();
                        }
                        foreach (string s in genreArr)
                        {
                            if (order == ScraperOrder.Primary || o.Data.Genres.Count == 0)
                            {
                                if (s != null)
                                {
                                    o.Data.Genres.Add(s.Trim());
                                }
                            }
                        }
                    }
                }
            }

            // get the game description
            if (initialPage.Contains("<h2>Description</h2>"))
            {
                if (order == ScraperOrder.Primary || o.Data.Overview == null)
                {
                    string[] arr1        = initialPage.Split(new string[] { "<h2>Description</h2>" }, StringSplitOptions.None);
                    string[] arr2        = arr1[1].Split(new string[] { "<div class=" }, StringSplitOptions.None);
                    string   description = WebUtility.HtmlDecode(Regex.Replace(arr2[0], @"<[^>]*>", String.Empty));
                    o.Data.Overview = description;
                }
            }


            // get alternate titles
            if (initialPage.Contains("<h2>Alternate Titles</h2>"))
            {
                if (order == ScraperOrder.Primary || o.Data.AlternateTitles == null)
                {
                    string[] arr3 = initialPage.Split(new string[] { "<h2>Alternate Titles</h2>" }, StringSplitOptions.None);
                    string   s3   = arr3[1].Replace("\n\r", "").Replace("\r\n", "").Replace("\r", "").Replace("\n", "").Replace("<ul>", "");
                    string[] arr4 = s3.Split(new string[] { "</ul>" }, StringSplitOptions.None);
                    string   s4   = arr4[0].Trim();
                    string   s5   = s4.Replace("<li>", "").Replace("</li>", "\n");
                    string[] s6   = s5.Split('\n');
                    if (o.Data.AlternateTitles == null)
                    {
                        o.Data.AlternateTitles = new List <string>();
                    }
                    foreach (string s in s6)
                    {
                        if (s != "")
                        {
                            o.Data.AlternateTitles.Add(WebUtility.HtmlDecode(s.Replace(" -- <em>Japanese spelling</em>", "")));
                        }
                    }
                }
            }

            if (gs.scrapeBoxart == true || gs.scrapeMedia == true)
            {
                // cover art
                // query the coverart page
                string baseurlcover = "http://www.mobygames.com/game/";
                string paramcover   = masterrecord.MOBYPlatformAlias + "/" + masterrecord.MOBYAlias + "/cover-art";
                string coverPage    = ReturnWebpage(baseurlcover, paramcover, 10000);

                // convert page string to htmldoc
                HtmlDocument cDoc = new HtmlDocument();
                cDoc.LoadHtml(coverPage);

                // get all divs of class "row"
                List <HtmlNode> coverDivs = cDoc.DocumentNode.SelectNodes("//div[@class ='row']").ToList();
                // take the second one
                HtmlNode cDiv = coverDivs[1];
                // now get the div classes that make up the 3 images we want
                if (coverPage.Contains("There are no covers for the selected platform."))
                {
                    // no cover images found - skip
                }
                else
                {
                    List <HtmlNode> imageDivs = cDiv.SelectNodes("//div[@class ='thumbnail']").ToList();

                    bool frontFound = false;
                    bool backFound  = false;
                    bool mediaFound = false;

                    // iterate through every 'row' div
                    foreach (HtmlNode h in imageDivs)
                    {
                        // get media type
                        List <HtmlNode> type      = h.SelectNodes("//div[@class ='thumbnail-cover-caption']").ToList();
                        List <HtmlNode> img       = h.SelectNodes("//a[@class ='thumbnail-cover']").ToList();
                        int             typeCount = type.Count;

                        for (int i = 0; i < typeCount; i++)
                        {
                            if (controller.IsCanceled == true)
                            {
                                controller.CloseAsync();
                                return(o);
                            }

                            string t     = type[i].InnerText.Trim().ToLower();
                            string MEDIA = "http://mobygames.com" + img[i].Attributes["style"].Value.Replace(");", "").Replace("background-image:url(", "").Replace("/s/", "/l/");

                            if (frontFound == false && t == "front cover")
                            {
                                if (o.FrontCovers == null || o.FrontCovers.Count == 0)
                                {
                                    if (gs.scrapeBoxart == true)
                                    {
                                        o.FrontCovers = new List <string>();
                                        o.FrontCovers.Add(MEDIA);
                                    }
                                }
                                frontFound = true;
                            }
                            if (backFound == false && t == "back cover")
                            {
                                if (o.BackCovers == null || o.BackCovers.Count == 0)
                                {
                                    if (gs.scrapeBoxart == true)
                                    {
                                        o.BackCovers = new List <string>();
                                        o.BackCovers.Add(MEDIA);
                                    }
                                }
                                backFound = true;
                            }
                            if (mediaFound == false && t == "media")
                            {
                                if (o.Medias == null || o.Medias.Count == 0)
                                {
                                    if (gs.scrapeMedia == true)
                                    {
                                        o.Medias = new List <string>();
                                        o.Medias.Add(MEDIA);
                                    }
                                }
                                mediaFound = true;
                            }

                            if (mediaFound == true && backFound == true && frontFound == true)
                            {
                                break;
                            }
                        }
                    }
                }
            }

            if (gs.scrapeScreenshots == true)
            {
                // screenshots
                // query the screenshots page
                string baseurlscreen = "http://www.mobygames.com/game/";
                string paramscreen   = masterrecord.MOBYPlatformAlias + "/" + masterrecord.MOBYAlias + "/screenshots";
                string screenPage    = ReturnWebpage(baseurlscreen, paramscreen, 10000);

                // convert page string to htmldoc
                HtmlDocument sDoc = new HtmlDocument();
                sDoc.LoadHtml(screenPage);

                // get core information
                if (!screenPage.Contains("There are no user screenshots on file"))
                {
                    List <HtmlNode> screens = sDoc.DocumentNode.SelectNodes("//a[@class ='thumbnail-image']").ToList();
                    if (o.Screenshots == null)
                    {
                        o.Screenshots = new List <string>();
                    }
                    int co = 0;
                    foreach (var screen in screens)
                    {
                        if (co >= gs.maxScreenshots)
                        {
                            break;
                        }
                        var    attrib = screen.Attributes["style"].Value;
                        string path   = attrib.Replace(");", "").Replace("background-image:url(", "").Replace("/s/", "/l/");
                        o.Screenshots.Add("http://mobygames.com" + path);
                        co++;
                    }
                }
            }
            return(o);
        }
Ejemplo n.º 2
0
        public static ScrapedGameObjectWeb ScrapeGame(ScrapedGameObjectWeb o, ScraperOrder order, ProgressDialogController controller, ScraperMaster masterrecord, string message)
        {
            bool priority;

            message = message + "Downloading information for: " + masterrecord.MOBYTitle + "\n(" + masterrecord.MOBYPlatformAlias + ")";
            if (order == ScraperOrder.Primary)
            {
                message = "Primary Scraping (mobygames)\n" + message;
            }
            else
            {
                message = "Secondary Scraping (mobygames)\n" + message;
            }
            GlobalSettings gs = GlobalSettings.GetGlobals();

            if (order == ScraperOrder.Primary)
            {
                priority = true;    // primary
                if (masterrecord.MOBYTitle != null)
                {
                    // moby data has been matched

                    controller.SetMessage(message);
                    o.Data.Title    = masterrecord.MOBYTitle;
                    o.Data.Platform = masterrecord.MOBYPlatformAlias;
                }
                else
                {
                    // no moby data matched - use gamesdb title and platform and return
                    o.Data.Title    = masterrecord.GDBTitle;
                    o.Data.Platform = masterrecord.GDBPlatformName;
                    return(o);
                }
            }
            else
            {
                // moby scraping is secondary
                if (masterrecord.MOBYTitle == null)
                {
                    o.Data.Title    = masterrecord.GDBTitle;
                    o.Data.Platform = masterrecord.GDBPlatformName;
                    return(o);
                }
                priority = false;    // primary
            }

            if (priority == true)
            {
                // primary scraping
                o = PullWebpageData(o, masterrecord, controller, ScraperOrder.Primary, message);
            }
            else
            {
                // secondary scraping
                o = PullWebpageData(o, masterrecord, controller, ScraperOrder.Secondary, message);
            }

            if (controller.IsCanceled == true)
            {
                controller.CloseAsync();
                return(o);
            }

            return(o);
        }
Ejemplo n.º 3
0
        public ScrapedDataAudit()
        {
            InitializeComponent();

            Data = new List <FolderData>();

            Master = ScraperMaster.GetMasterList();

            // enumerate game data folder
            BaseFolder = AppDomain.CurrentDomain.BaseDirectory + "\\Data\\Games";

            var fol = Directory.GetDirectories(BaseFolder);

            foreach (var d in fol)
            {
                string fName = System.IO.Path.GetFileName(d);

                // check whether folder name is a valid number
                int  n;
                bool isNumeric = int.TryParse(fName, out n);
                if (isNumeric == false)
                {
                    continue;
                }

                // build folderdata object
                FolderData fd = new FolderData();
                fd.FolderName = n;

                // find gdbid from Master
                var g = (from a in Master
                         where a.gid == n
                         select a).FirstOrDefault();

                // is result empty?
                if (g == null)
                {
                    continue;
                }

                fd.GameName = g.GDBTitle;
                fd.System   = g.GDBPlatformName; //GSystem.GetSystemName(Convert.ToInt32(g.MedLaunchSystemId));

                // now check whether it is linked anywhere in the database
                var search = from a in Game.GetGames()
                             where a.gdbId == n
                             select a;

                if (search.Count() > 0)
                {
                    fd.IsLinked = true;
                }
                else
                {
                    fd.IsLinked = false;
                }

                // add to list
                Data.Add(fd);
            }

            Data.OrderBy(a => a.FolderName);

            // populate the datagrid
            dgAudit.ItemsSource = Data;
        }
Ejemplo n.º 4
0
        public static ScrapedGameObjectWeb ScrapeGame(ScrapedGameObjectWeb o, ScraperOrder order, ProgressDialogController controller, ScraperMaster masterrecord, string message)
        {
            bool priority;

            message = message + "Downloading information for: " + masterrecord.TGDBData.GamesDBTitle + "\n(" + masterrecord.TGDBData.GamesDBPlatformName + ")";
            if (order == ScraperOrder.Primary)
            {
                message = "Primary Scraping (thegamesdb)\n" + message;
            }
            else
            {
                message = "Secondary Scraping (thegamesdb)\n" + message;
            }
            string BaseImgUrl = "http://thegamesdb.net/banners/";

            GlobalSettings gs = GlobalSettings.GetGlobals();

            if (order == ScraperOrder.Primary)
            {
                controller.SetMessage(message);
                priority         = true; // primary
                message          =
                    o.Data.Title = masterrecord.TGDBData.GamesDBTitle;
                o.Data.Platform  = masterrecord.TGDBData.GamesDBPlatformName;
            }
            else
            {
                // GDB is secondary scraper
                controller.SetMessage(message);
                priority = false;    // primary
                if (o.Data.Title == null)
                {
                    o.Data.Title = masterrecord.TGDBData.GamesDBTitle;
                }
                if (o.Data.Platform == null)
                {
                    o.Data.Platform = masterrecord.TGDBData.GamesDBPlatformName;
                }
            }

            if (priority == true)
            {
                /* Primary Scraping */

                // get the text data from thegamesdb.net
                GDBNETGame g = new GDBNETGame();
                g = GDBNETGamesDB.GetGame(o.GdbId);

                if (g == null)
                {
                    // Nothing was returned
                    return(o);
                }

                o.Data.AlternateTitles = g.AlternateTitles;
                o.Data.Coop            = g.Coop;
                o.Data.Developer       = g.Developer;
                o.Data.ESRB            = g.ESRB;
                o.Data.Genres          = g.Genres;
                o.Data.Overview        = g.Overview;
                o.Data.Players         = g.Players;
                o.Data.Publisher       = g.Publisher;
                o.Data.Released        = g.ReleaseDate;
                if (gs.scrapeBoxart == true)
                {
                    if (g.Images.BoxartBack != null)
                    {
                        o.BackCovers.Add(BaseImgUrl + g.Images.BoxartBack.Path);
                    }
                    if (g.Images.BoxartFront != null)
                    {
                        o.FrontCovers.Add(BaseImgUrl + g.Images.BoxartFront.Path);
                    }
                }
                if (gs.scrapeBanners == true)
                {
                    foreach (var s in g.Images.Banners)
                    {
                        o.Banners.Add(BaseImgUrl + s.Path);
                    }
                }
                if (gs.scrapeFanart == true)
                {
                    int co = 0;
                    foreach (var s in g.Images.Fanart)
                    {
                        if (co >= gs.maxFanarts)
                        {
                            break;
                        }
                        o.FanArts.Add(BaseImgUrl + s.Path);
                        co++;
                    }
                }
                if (gs.scrapeScreenshots == true)
                {
                    int co = 0;
                    foreach (var s in g.Images.Screenshots)
                    {
                        if (co >= gs.maxScreenshots)
                        {
                            break;
                        }
                        o.Screenshots.Add(BaseImgUrl + s.Path);
                        co++;
                    }
                }
            }
            else
            {
                /* secondary scraping */

                // get the text data from thegamesdb.net
                GDBNETGame g = new GDBNETGame();
                g = GDBNETGamesDB.GetGame(o.GdbId);

                if (g == null)
                {
                    // Nothing was returned
                    return(o);
                }

                if (o.Data.AlternateTitles == null && g.AlternateTitles.Count > 0)
                {
                    o.Data.AlternateTitles = new List <string>();
                    o.Data.AlternateTitles.AddRange(g.AlternateTitles);
                }

                if (o.Data.Coop == null && g.Coop != null)
                {
                    o.Data.Coop = g.Coop;
                }
                if (o.Data.Developer == null && g.Developer != null)
                {
                    o.Data.Developer = g.Developer;
                }
                if (o.Data.ESRB == null && g.ESRB != null)
                {
                    o.Data.ESRB = g.ESRB;
                }
                if (o.Data.Genres == null && g.Genres != null)
                {
                    o.Data.Genres = new List <string>();
                    o.Data.Genres.AddRange(g.Genres);
                }

                if (o.Data.Overview == null)
                {
                    o.Data.Overview = g.Overview;
                }
                if (o.Data.Players == null)
                {
                    o.Data.Players = g.Players;
                }
                if (o.Data.Publisher == null)
                {
                    o.Data.Publisher = g.Publisher;
                }
                if (o.Data.Released == null)
                {
                    o.Data.Released = g.ReleaseDate;
                }
                if (gs.scrapeBoxart == true)
                {
                    if (o.BackCovers.Count == 0 && g.Images.BoxartBack != null)
                    {
                        o.BackCovers.Add(BaseImgUrl + g.Images.BoxartBack.Path);
                    }
                    if (o.FrontCovers.Count == 0 && g.Images.BoxartFront != null)
                    {
                        o.FrontCovers.Add(BaseImgUrl + g.Images.BoxartFront.Path);
                    }
                }
                if (gs.scrapeBanners == true)
                {
                    if (o.Banners.Count == 0 && g.Images.Banners != null)
                    {
                        foreach (var s in g.Images.Banners)
                        {
                            o.Banners.Add(BaseImgUrl + s.Path);
                        }
                    }
                }
                if (gs.scrapeFanart == true)
                {
                    if (o.FanArts.Count == 0 && g.Images.Fanart != null)
                    {
                        int co = 0;
                        foreach (var s in g.Images.Fanart)
                        {
                            if (co >= gs.maxFanarts)
                            {
                                break;
                            }
                            o.FanArts.Add(BaseImgUrl + s.Path);
                            co++;
                        }
                    }
                }
                if (gs.scrapeScreenshots == true && g.Images.Screenshots != null)
                {
                    int co = 0;
                    foreach (var s in g.Images.Screenshots)
                    {
                        if (co >= gs.maxScreenshots)
                        {
                            break;
                        }
                        o.Screenshots.Add(BaseImgUrl + s.Path);
                        co++;
                    }
                }

                // remove duplicates
                //o.Data.AlternateTitles.Distinct();
                o.Data.Genres.Distinct();
                o.Screenshots.Distinct();
                o.FanArts.Distinct();
                o.Banners.Distinct();
                o.BackCovers.Distinct();
                o.FrontCovers.Distinct();
            }

            return(o);
        }
Ejemplo n.º 5
0
        public ICollection <ScraperMaster> SearchGameLocal(string gameName, int systemId, int gameId)
        {
            SearchString            = gameName;
            LocalIterationCount     = 0;
            WorkingSearchCollection = new List <ScraperMaster>();
            SearchCollection        = new List <ScraperMaster>();

            if (SearchString.Contains("[PD]") || SearchString.Contains("(PD)") || SearchString.Contains("SC-3000") || SearchString.Contains("BIOS"))
            {
                // ignore public domain games
                return(SearchCollection);
            }

            // convert pce-cd systemid
            if (systemId == 18)
            {
                systemId = 7;
            }

            // get a list with all games for this system
            SystemCollection = PlatformGames.Where(a => a.MedLaunchSystemId == systemId).ToList();

            // Match all words and return a list ordered by higest matches
            List <SearchOrdering> searchResult = OrderByMatchedWords(StripSymbols(gameName.ToLower()));

            // get max value in the list
            var maxValueRecord = searchResult.OrderByDescending(v => v.Matches).FirstOrDefault();

            if (maxValueRecord == null)
            {
                SearchCollection = (from a in searchResult
                                    select a.Game).ToList();
            }
            else
            {
                int maxValue = maxValueRecord.Matches;
                // select all records that have the max value

                List <SearchOrdering> matches = (from a in searchResult
                                                 where (a.Matches == maxValue) // || a.Matches == maxValue - 1)
                                                 select a).ToList();
                SearchCollection = (from a in matches
                                    select a.Game).ToList();
                if (matches.Count == 1)
                {
                    // single entry returned
                    List <ScraperMaster> single = (from a in matches
                                                   select a.Game).ToList();
                    return(single);
                }
                if (matches.Count == 0)
                {
                    return(null);
                }
            }


            // Multiple records returned - continue

            // match order of words starting with the first and incrementing
            List <ScraperMaster> m = MatchOneWordAtATime(SearchCollection, StripSymbols(gameName.ToLower()));

            if (m.Count == 1)
            {
                return(m);
            }
            if (m.Count > 1)
            {
                SearchCollection = m;
            }


            if (SearchCollection.Count == 2 && _GlobalSettings.preferGenesis == true)
            {
                // 2 records returned - check whether they match exactly
                string first = (from a in SearchCollection
                                select a.TGDBData.GamesDBTitle).First();
                string last = (from a in SearchCollection
                               select a.TGDBData.GamesDBTitle).Last();
                if (first == last)
                {
                    // looks like the same game - perhaps different systems on the games db (ie - Megadrive / Genesis) - return the first result
                    ScraperMaster pg = (from a in SearchCollection
                                        select a).First();
                    List <ScraperMaster> l = new List <ScraperMaster>();
                    l.Add(pg);
                    return(l);
                }
            }

            // still no definiate match found
            // run levenshetein fuzzy search on SearchCollection - 10 iterations
            List <ScraperMaster> lg = LevenIteration(SearchCollection, StripSymbols(gameName.ToLower()));

            return(lg);

            // remove [anything inbetween] or (anything inbetween) from in the incoming string and remove it

            // remove any symbols
            string gName = StripSymbols(gameName);

            // Pass to search method for fuzzy searching
            StartFuzzySearch(gName, 0);

            // if there is only one entry in searchcollection - match has been found - add it to the database for scraping later
            if (WorkingSearchCollection.Count == 1)
            {
                ScraperMaster g = WorkingSearchCollection.FirstOrDefault();
                //GDBGameData gd = new GDBGameData();

                /*
                 * gd.Id = gameId;
                 * gd.GDBGameId = g.id;
                 * gd.Title = g.GameTitle;
                 * gd.ReleaseDate = g.ReleaseDate;
                 */
            }

            //return SearchCollection;
            return(WorkingSearchCollection);
        }