/// <summary> /// Master handler for parsing data from the mobygames website /// </summary> /// <param name="o"></param> /// <param name="controller"></param> /// <returns></returns> public static ScrapedGameObjectWeb PullWebpageData(ScrapedGameObjectWeb o, ScraperMaster masterrecord, ProgressDialogController controller, ScraperOrder order, string message) { // query the main game page string baseurl = "http://www.mobygames.com/game/"; string param = masterrecord.MOBYPlatformAlias + "/" + masterrecord.MOBYAlias; string initialPage = ReturnWebpage(baseurl, param, 10000); // response error checking switch (initialPage) { case "ERROR - Server Error": case "ERROR - Not Found": return(o); } GlobalSettings gs = GlobalSettings.GetGlobals(); // convert page string to htmldoc HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(initialPage); // get core information List <HtmlNode> divs = doc.DocumentNode.SelectNodes("//div").ToList(); //List<HtmlNode> coreGenreList = coreGenre.ChildNodes.ToList(); int divcount = divs.Count; for (int i = 0; i < divcount; i++) { if (controller.IsCanceled == true) { controller.CloseAsync(); return(o); } // get just headings if (divs[i].InnerText == divs[i].InnerHtml) { // this is just a heading - i+1 should give the value if (divs[i].InnerHtml == "Published by") { if (order == ScraperOrder.Primary || o.Data.Publisher == null) { o.Data.Publisher = WebUtility.HtmlDecode(divs[i + 1].InnerText); } } if (divs[i].InnerHtml == "Developed by") { if (order == ScraperOrder.Primary || o.Data.Developer == null) { o.Data.Developer = WebUtility.HtmlDecode(divs[i + 1].InnerText); } } if (divs[i].InnerHtml == "Released") { if (order == ScraperOrder.Primary || o.Data.Released == null) { o.Data.Released = WebUtility.HtmlDecode(divs[i + 1].InnerText); } } if (divs[i].InnerHtml == "ESRB Rating") { if (order == ScraperOrder.Primary || o.Data.ESRB == null) { o.Data.ESRB = WebUtility.HtmlDecode(divs[i + 1].InnerText); } } if (divs[i].InnerHtml == "Genre") { string genres = WebUtility.HtmlDecode(divs[i + 1].InnerText); string[] genreArr = genres.Split('/'); if (o.Data.Genres == null) { o.Data.Genres = new List <string>(); } foreach (string s in genreArr) { if (order == ScraperOrder.Primary || o.Data.Genres.Count == 0) { if (s != null) { o.Data.Genres.Add(s.Trim()); } } } } } } // get the game description if (initialPage.Contains("<h2>Description</h2>")) { if (order == ScraperOrder.Primary || o.Data.Overview == null) { string[] arr1 = initialPage.Split(new string[] { "<h2>Description</h2>" }, StringSplitOptions.None); string[] arr2 = arr1[1].Split(new string[] { "<div class=" }, StringSplitOptions.None); string description = WebUtility.HtmlDecode(Regex.Replace(arr2[0], @"<[^>]*>", String.Empty)); o.Data.Overview = description; } } // get alternate titles if (initialPage.Contains("<h2>Alternate Titles</h2>")) { if (order == ScraperOrder.Primary || o.Data.AlternateTitles == null) { string[] arr3 = initialPage.Split(new string[] { "<h2>Alternate Titles</h2>" }, StringSplitOptions.None); string s3 = arr3[1].Replace("\n\r", "").Replace("\r\n", "").Replace("\r", "").Replace("\n", "").Replace("<ul>", ""); string[] arr4 = s3.Split(new string[] { "</ul>" }, StringSplitOptions.None); string s4 = arr4[0].Trim(); string s5 = s4.Replace("<li>", "").Replace("</li>", "\n"); string[] s6 = s5.Split('\n'); if (o.Data.AlternateTitles == null) { o.Data.AlternateTitles = new List <string>(); } foreach (string s in s6) { if (s != "") { o.Data.AlternateTitles.Add(WebUtility.HtmlDecode(s.Replace(" -- <em>Japanese spelling</em>", ""))); } } } } if (gs.scrapeBoxart == true || gs.scrapeMedia == true) { // cover art // query the coverart page string baseurlcover = "http://www.mobygames.com/game/"; string paramcover = masterrecord.MOBYPlatformAlias + "/" + masterrecord.MOBYAlias + "/cover-art"; string coverPage = ReturnWebpage(baseurlcover, paramcover, 10000); // convert page string to htmldoc HtmlDocument cDoc = new HtmlDocument(); cDoc.LoadHtml(coverPage); // get all divs of class "row" List <HtmlNode> coverDivs = cDoc.DocumentNode.SelectNodes("//div[@class ='row']").ToList(); // take the second one HtmlNode cDiv = coverDivs[1]; // now get the div classes that make up the 3 images we want if (coverPage.Contains("There are no covers for the selected platform.")) { // no cover images found - skip } else { List <HtmlNode> imageDivs = cDiv.SelectNodes("//div[@class ='thumbnail']").ToList(); bool frontFound = false; bool backFound = false; bool mediaFound = false; // iterate through every 'row' div foreach (HtmlNode h in imageDivs) { // get media type List <HtmlNode> type = h.SelectNodes("//div[@class ='thumbnail-cover-caption']").ToList(); List <HtmlNode> img = h.SelectNodes("//a[@class ='thumbnail-cover']").ToList(); int typeCount = type.Count; for (int i = 0; i < typeCount; i++) { if (controller.IsCanceled == true) { controller.CloseAsync(); return(o); } string t = type[i].InnerText.Trim().ToLower(); string MEDIA = "http://mobygames.com" + img[i].Attributes["style"].Value.Replace(");", "").Replace("background-image:url(", "").Replace("/s/", "/l/"); if (frontFound == false && t == "front cover") { if (o.FrontCovers == null || o.FrontCovers.Count == 0) { if (gs.scrapeBoxart == true) { o.FrontCovers = new List <string>(); o.FrontCovers.Add(MEDIA); } } frontFound = true; } if (backFound == false && t == "back cover") { if (o.BackCovers == null || o.BackCovers.Count == 0) { if (gs.scrapeBoxart == true) { o.BackCovers = new List <string>(); o.BackCovers.Add(MEDIA); } } backFound = true; } if (mediaFound == false && t == "media") { if (o.Medias == null || o.Medias.Count == 0) { if (gs.scrapeMedia == true) { o.Medias = new List <string>(); o.Medias.Add(MEDIA); } } mediaFound = true; } if (mediaFound == true && backFound == true && frontFound == true) { break; } } } } } if (gs.scrapeScreenshots == true) { // screenshots // query the screenshots page string baseurlscreen = "http://www.mobygames.com/game/"; string paramscreen = masterrecord.MOBYPlatformAlias + "/" + masterrecord.MOBYAlias + "/screenshots"; string screenPage = ReturnWebpage(baseurlscreen, paramscreen, 10000); // convert page string to htmldoc HtmlDocument sDoc = new HtmlDocument(); sDoc.LoadHtml(screenPage); // get core information if (!screenPage.Contains("There are no user screenshots on file")) { List <HtmlNode> screens = sDoc.DocumentNode.SelectNodes("//a[@class ='thumbnail-image']").ToList(); if (o.Screenshots == null) { o.Screenshots = new List <string>(); } int co = 0; foreach (var screen in screens) { if (co >= gs.maxScreenshots) { break; } var attrib = screen.Attributes["style"].Value; string path = attrib.Replace(");", "").Replace("background-image:url(", "").Replace("/s/", "/l/"); o.Screenshots.Add("http://mobygames.com" + path); co++; } } } return(o); }
public static ScrapedGameObjectWeb ScrapeGame(ScrapedGameObjectWeb o, ScraperOrder order, ProgressDialogController controller, ScraperMaster masterrecord, string message) { bool priority; message = message + "Downloading information for: " + masterrecord.MOBYTitle + "\n(" + masterrecord.MOBYPlatformAlias + ")"; if (order == ScraperOrder.Primary) { message = "Primary Scraping (mobygames)\n" + message; } else { message = "Secondary Scraping (mobygames)\n" + message; } GlobalSettings gs = GlobalSettings.GetGlobals(); if (order == ScraperOrder.Primary) { priority = true; // primary if (masterrecord.MOBYTitle != null) { // moby data has been matched controller.SetMessage(message); o.Data.Title = masterrecord.MOBYTitle; o.Data.Platform = masterrecord.MOBYPlatformAlias; } else { // no moby data matched - use gamesdb title and platform and return o.Data.Title = masterrecord.GDBTitle; o.Data.Platform = masterrecord.GDBPlatformName; return(o); } } else { // moby scraping is secondary if (masterrecord.MOBYTitle == null) { o.Data.Title = masterrecord.GDBTitle; o.Data.Platform = masterrecord.GDBPlatformName; return(o); } priority = false; // primary } if (priority == true) { // primary scraping o = PullWebpageData(o, masterrecord, controller, ScraperOrder.Primary, message); } else { // secondary scraping o = PullWebpageData(o, masterrecord, controller, ScraperOrder.Secondary, message); } if (controller.IsCanceled == true) { controller.CloseAsync(); return(o); } return(o); }
public ScrapedDataAudit() { InitializeComponent(); Data = new List <FolderData>(); Master = ScraperMaster.GetMasterList(); // enumerate game data folder BaseFolder = AppDomain.CurrentDomain.BaseDirectory + "\\Data\\Games"; var fol = Directory.GetDirectories(BaseFolder); foreach (var d in fol) { string fName = System.IO.Path.GetFileName(d); // check whether folder name is a valid number int n; bool isNumeric = int.TryParse(fName, out n); if (isNumeric == false) { continue; } // build folderdata object FolderData fd = new FolderData(); fd.FolderName = n; // find gdbid from Master var g = (from a in Master where a.gid == n select a).FirstOrDefault(); // is result empty? if (g == null) { continue; } fd.GameName = g.GDBTitle; fd.System = g.GDBPlatformName; //GSystem.GetSystemName(Convert.ToInt32(g.MedLaunchSystemId)); // now check whether it is linked anywhere in the database var search = from a in Game.GetGames() where a.gdbId == n select a; if (search.Count() > 0) { fd.IsLinked = true; } else { fd.IsLinked = false; } // add to list Data.Add(fd); } Data.OrderBy(a => a.FolderName); // populate the datagrid dgAudit.ItemsSource = Data; }
public static ScrapedGameObjectWeb ScrapeGame(ScrapedGameObjectWeb o, ScraperOrder order, ProgressDialogController controller, ScraperMaster masterrecord, string message) { bool priority; message = message + "Downloading information for: " + masterrecord.TGDBData.GamesDBTitle + "\n(" + masterrecord.TGDBData.GamesDBPlatformName + ")"; if (order == ScraperOrder.Primary) { message = "Primary Scraping (thegamesdb)\n" + message; } else { message = "Secondary Scraping (thegamesdb)\n" + message; } string BaseImgUrl = "http://thegamesdb.net/banners/"; GlobalSettings gs = GlobalSettings.GetGlobals(); if (order == ScraperOrder.Primary) { controller.SetMessage(message); priority = true; // primary message = o.Data.Title = masterrecord.TGDBData.GamesDBTitle; o.Data.Platform = masterrecord.TGDBData.GamesDBPlatformName; } else { // GDB is secondary scraper controller.SetMessage(message); priority = false; // primary if (o.Data.Title == null) { o.Data.Title = masterrecord.TGDBData.GamesDBTitle; } if (o.Data.Platform == null) { o.Data.Platform = masterrecord.TGDBData.GamesDBPlatformName; } } if (priority == true) { /* Primary Scraping */ // get the text data from thegamesdb.net GDBNETGame g = new GDBNETGame(); g = GDBNETGamesDB.GetGame(o.GdbId); if (g == null) { // Nothing was returned return(o); } o.Data.AlternateTitles = g.AlternateTitles; o.Data.Coop = g.Coop; o.Data.Developer = g.Developer; o.Data.ESRB = g.ESRB; o.Data.Genres = g.Genres; o.Data.Overview = g.Overview; o.Data.Players = g.Players; o.Data.Publisher = g.Publisher; o.Data.Released = g.ReleaseDate; if (gs.scrapeBoxart == true) { if (g.Images.BoxartBack != null) { o.BackCovers.Add(BaseImgUrl + g.Images.BoxartBack.Path); } if (g.Images.BoxartFront != null) { o.FrontCovers.Add(BaseImgUrl + g.Images.BoxartFront.Path); } } if (gs.scrapeBanners == true) { foreach (var s in g.Images.Banners) { o.Banners.Add(BaseImgUrl + s.Path); } } if (gs.scrapeFanart == true) { int co = 0; foreach (var s in g.Images.Fanart) { if (co >= gs.maxFanarts) { break; } o.FanArts.Add(BaseImgUrl + s.Path); co++; } } if (gs.scrapeScreenshots == true) { int co = 0; foreach (var s in g.Images.Screenshots) { if (co >= gs.maxScreenshots) { break; } o.Screenshots.Add(BaseImgUrl + s.Path); co++; } } } else { /* secondary scraping */ // get the text data from thegamesdb.net GDBNETGame g = new GDBNETGame(); g = GDBNETGamesDB.GetGame(o.GdbId); if (g == null) { // Nothing was returned return(o); } if (o.Data.AlternateTitles == null && g.AlternateTitles.Count > 0) { o.Data.AlternateTitles = new List <string>(); o.Data.AlternateTitles.AddRange(g.AlternateTitles); } if (o.Data.Coop == null && g.Coop != null) { o.Data.Coop = g.Coop; } if (o.Data.Developer == null && g.Developer != null) { o.Data.Developer = g.Developer; } if (o.Data.ESRB == null && g.ESRB != null) { o.Data.ESRB = g.ESRB; } if (o.Data.Genres == null && g.Genres != null) { o.Data.Genres = new List <string>(); o.Data.Genres.AddRange(g.Genres); } if (o.Data.Overview == null) { o.Data.Overview = g.Overview; } if (o.Data.Players == null) { o.Data.Players = g.Players; } if (o.Data.Publisher == null) { o.Data.Publisher = g.Publisher; } if (o.Data.Released == null) { o.Data.Released = g.ReleaseDate; } if (gs.scrapeBoxart == true) { if (o.BackCovers.Count == 0 && g.Images.BoxartBack != null) { o.BackCovers.Add(BaseImgUrl + g.Images.BoxartBack.Path); } if (o.FrontCovers.Count == 0 && g.Images.BoxartFront != null) { o.FrontCovers.Add(BaseImgUrl + g.Images.BoxartFront.Path); } } if (gs.scrapeBanners == true) { if (o.Banners.Count == 0 && g.Images.Banners != null) { foreach (var s in g.Images.Banners) { o.Banners.Add(BaseImgUrl + s.Path); } } } if (gs.scrapeFanart == true) { if (o.FanArts.Count == 0 && g.Images.Fanart != null) { int co = 0; foreach (var s in g.Images.Fanart) { if (co >= gs.maxFanarts) { break; } o.FanArts.Add(BaseImgUrl + s.Path); co++; } } } if (gs.scrapeScreenshots == true && g.Images.Screenshots != null) { int co = 0; foreach (var s in g.Images.Screenshots) { if (co >= gs.maxScreenshots) { break; } o.Screenshots.Add(BaseImgUrl + s.Path); co++; } } // remove duplicates //o.Data.AlternateTitles.Distinct(); o.Data.Genres.Distinct(); o.Screenshots.Distinct(); o.FanArts.Distinct(); o.Banners.Distinct(); o.BackCovers.Distinct(); o.FrontCovers.Distinct(); } return(o); }
public ICollection <ScraperMaster> SearchGameLocal(string gameName, int systemId, int gameId) { SearchString = gameName; LocalIterationCount = 0; WorkingSearchCollection = new List <ScraperMaster>(); SearchCollection = new List <ScraperMaster>(); if (SearchString.Contains("[PD]") || SearchString.Contains("(PD)") || SearchString.Contains("SC-3000") || SearchString.Contains("BIOS")) { // ignore public domain games return(SearchCollection); } // convert pce-cd systemid if (systemId == 18) { systemId = 7; } // get a list with all games for this system SystemCollection = PlatformGames.Where(a => a.MedLaunchSystemId == systemId).ToList(); // Match all words and return a list ordered by higest matches List <SearchOrdering> searchResult = OrderByMatchedWords(StripSymbols(gameName.ToLower())); // get max value in the list var maxValueRecord = searchResult.OrderByDescending(v => v.Matches).FirstOrDefault(); if (maxValueRecord == null) { SearchCollection = (from a in searchResult select a.Game).ToList(); } else { int maxValue = maxValueRecord.Matches; // select all records that have the max value List <SearchOrdering> matches = (from a in searchResult where (a.Matches == maxValue) // || a.Matches == maxValue - 1) select a).ToList(); SearchCollection = (from a in matches select a.Game).ToList(); if (matches.Count == 1) { // single entry returned List <ScraperMaster> single = (from a in matches select a.Game).ToList(); return(single); } if (matches.Count == 0) { return(null); } } // Multiple records returned - continue // match order of words starting with the first and incrementing List <ScraperMaster> m = MatchOneWordAtATime(SearchCollection, StripSymbols(gameName.ToLower())); if (m.Count == 1) { return(m); } if (m.Count > 1) { SearchCollection = m; } if (SearchCollection.Count == 2 && _GlobalSettings.preferGenesis == true) { // 2 records returned - check whether they match exactly string first = (from a in SearchCollection select a.TGDBData.GamesDBTitle).First(); string last = (from a in SearchCollection select a.TGDBData.GamesDBTitle).Last(); if (first == last) { // looks like the same game - perhaps different systems on the games db (ie - Megadrive / Genesis) - return the first result ScraperMaster pg = (from a in SearchCollection select a).First(); List <ScraperMaster> l = new List <ScraperMaster>(); l.Add(pg); return(l); } } // still no definiate match found // run levenshetein fuzzy search on SearchCollection - 10 iterations List <ScraperMaster> lg = LevenIteration(SearchCollection, StripSymbols(gameName.ToLower())); return(lg); // remove [anything inbetween] or (anything inbetween) from in the incoming string and remove it // remove any symbols string gName = StripSymbols(gameName); // Pass to search method for fuzzy searching StartFuzzySearch(gName, 0); // if there is only one entry in searchcollection - match has been found - add it to the database for scraping later if (WorkingSearchCollection.Count == 1) { ScraperMaster g = WorkingSearchCollection.FirstOrDefault(); //GDBGameData gd = new GDBGameData(); /* * gd.Id = gameId; * gd.GDBGameId = g.id; * gd.Title = g.GameTitle; * gd.ReleaseDate = g.ReleaseDate; */ } //return SearchCollection; return(WorkingSearchCollection); }