public static ScrapedGameObjectWeb ScrapeGame(ScrapedGameObjectWeb o, ScraperOrder order, ProgressDialogController controller, ScraperMaster masterrecord, string message) { bool priority; message = message + "Downloading information for: " + masterrecord.MobyData.MobyTitle + "\n(" + masterrecord.MobyData.MobyPlatformName + ")"; if (order == ScraperOrder.Primary) { message = "Primary Scraping (mobygames)\n" + message; } else { message = "Secondary Scraping (mobygames)\n" + message; } GlobalSettings gs = GlobalSettings.GetGlobals(); if (order == ScraperOrder.Primary) { priority = true; // primary if (masterrecord.MobyData.MobyTitle != null) { // moby data has been matched controller.SetMessage(message); o.Data.Title = masterrecord.MobyData.MobyTitle; o.Data.Platform = masterrecord.MobyData.MobyPlatformName; } else { // no moby data matched - use gamesdb title and platform and return o.Data.Title = masterrecord.TGDBData.GamesDBTitle; o.Data.Platform = masterrecord.TGDBData.GamesDBPlatformName; return(o); } } else { // moby scraping is secondary if (masterrecord.MobyData.MobyTitle == null) { o.Data.Title = masterrecord.TGDBData.GamesDBTitle; o.Data.Platform = masterrecord.TGDBData.GamesDBPlatformName; return(o); } priority = false; // primary } if (priority == true) { // primary scraping o = PullWebpageData(o, masterrecord, controller, ScraperOrder.Primary, message); } else { // secondary scraping o = PullWebpageData(o, masterrecord, controller, ScraperOrder.Secondary, message); } return(o); }
/// <summary> /// Master handler for parsing data from the mobygames website /// </summary> /// <param name="o"></param> /// <param name="controller"></param> /// <returns></returns> public static ScrapedGameObjectWeb PullWebpageData(ScrapedGameObjectWeb o, ScraperMaster masterrecord, ProgressDialogController controller, ScraperOrder order, string message) { // query the main game page string baseurl = "http://www.mobygames.com/game/"; string param = masterrecord.MOBYPlatformAlias + "/" + masterrecord.MOBYAlias; string initialPage = ReturnWebpage(baseurl, param, 10000); // response error checking switch (initialPage) { case "ERROR - Server Error": case "ERROR - Not Found": return(o); } GlobalSettings gs = GlobalSettings.GetGlobals(); // convert page string to htmldoc HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(initialPage); // get core information List <HtmlNode> divs = doc.DocumentNode.SelectNodes("//div").ToList(); //List<HtmlNode> coreGenreList = coreGenre.ChildNodes.ToList(); int divcount = divs.Count; for (int i = 0; i < divcount; i++) { if (controller.IsCanceled == true) { controller.CloseAsync(); return(o); } // get just headings if (divs[i].InnerText == divs[i].InnerHtml) { // this is just a heading - i+1 should give the value if (divs[i].InnerHtml == "Published by") { if (order == ScraperOrder.Primary || o.Data.Publisher == null) { o.Data.Publisher = WebUtility.HtmlDecode(divs[i + 1].InnerText); } } if (divs[i].InnerHtml == "Developed by") { if (order == ScraperOrder.Primary || o.Data.Developer == null) { o.Data.Developer = WebUtility.HtmlDecode(divs[i + 1].InnerText); } } if (divs[i].InnerHtml == "Released") { if (order == ScraperOrder.Primary || o.Data.Released == null) { o.Data.Released = WebUtility.HtmlDecode(divs[i + 1].InnerText); } } if (divs[i].InnerHtml == "ESRB Rating") { if (order == ScraperOrder.Primary || o.Data.ESRB == null) { o.Data.ESRB = WebUtility.HtmlDecode(divs[i + 1].InnerText); } } if (divs[i].InnerHtml == "Genre") { string genres = WebUtility.HtmlDecode(divs[i + 1].InnerText); string[] genreArr = genres.Split('/'); if (o.Data.Genres == null) { o.Data.Genres = new List <string>(); } foreach (string s in genreArr) { if (order == ScraperOrder.Primary || o.Data.Genres.Count == 0) { if (s != null) { o.Data.Genres.Add(s.Trim()); } } } } } } // get the game description if (initialPage.Contains("<h2>Description</h2>")) { if (order == ScraperOrder.Primary || o.Data.Overview == null) { string[] arr1 = initialPage.Split(new string[] { "<h2>Description</h2>" }, StringSplitOptions.None); string[] arr2 = arr1[1].Split(new string[] { "<div class=" }, StringSplitOptions.None); string description = WebUtility.HtmlDecode(Regex.Replace(arr2[0], @"<[^>]*>", String.Empty)); o.Data.Overview = description; } } // get alternate titles if (initialPage.Contains("<h2>Alternate Titles</h2>")) { if (order == ScraperOrder.Primary || o.Data.AlternateTitles == null) { string[] arr3 = initialPage.Split(new string[] { "<h2>Alternate Titles</h2>" }, StringSplitOptions.None); string s3 = arr3[1].Replace("\n\r", "").Replace("\r\n", "").Replace("\r", "").Replace("\n", "").Replace("<ul>", ""); string[] arr4 = s3.Split(new string[] { "</ul>" }, StringSplitOptions.None); string s4 = arr4[0].Trim(); string s5 = s4.Replace("<li>", "").Replace("</li>", "\n"); string[] s6 = s5.Split('\n'); if (o.Data.AlternateTitles == null) { o.Data.AlternateTitles = new List <string>(); } foreach (string s in s6) { if (s != "") { o.Data.AlternateTitles.Add(WebUtility.HtmlDecode(s.Replace(" -- <em>Japanese spelling</em>", ""))); } } } } if (gs.scrapeBoxart == true || gs.scrapeMedia == true) { // cover art // query the coverart page string baseurlcover = "http://www.mobygames.com/game/"; string paramcover = masterrecord.MOBYPlatformAlias + "/" + masterrecord.MOBYAlias + "/cover-art"; string coverPage = ReturnWebpage(baseurlcover, paramcover, 10000); // convert page string to htmldoc HtmlDocument cDoc = new HtmlDocument(); cDoc.LoadHtml(coverPage); // get all divs of class "row" List <HtmlNode> coverDivs = cDoc.DocumentNode.SelectNodes("//div[@class ='row']").ToList(); // take the second one HtmlNode cDiv = coverDivs[1]; // now get the div classes that make up the 3 images we want if (coverPage.Contains("There are no covers for the selected platform.")) { // no cover images found - skip } else { List <HtmlNode> imageDivs = cDiv.SelectNodes("//div[@class ='thumbnail']").ToList(); bool frontFound = false; bool backFound = false; bool mediaFound = false; // iterate through every 'row' div foreach (HtmlNode h in imageDivs) { // get media type List <HtmlNode> type = h.SelectNodes("//div[@class ='thumbnail-cover-caption']").ToList(); List <HtmlNode> img = h.SelectNodes("//a[@class ='thumbnail-cover']").ToList(); int typeCount = type.Count; for (int i = 0; i < typeCount; i++) { if (controller.IsCanceled == true) { controller.CloseAsync(); return(o); } string t = type[i].InnerText.Trim().ToLower(); string MEDIA = "http://mobygames.com" + img[i].Attributes["style"].Value.Replace(");", "").Replace("background-image:url(", "").Replace("/s/", "/l/"); if (frontFound == false && t == "front cover") { if (o.FrontCovers == null || o.FrontCovers.Count == 0) { if (gs.scrapeBoxart == true) { o.FrontCovers = new List <string>(); o.FrontCovers.Add(MEDIA); } } frontFound = true; } if (backFound == false && t == "back cover") { if (o.BackCovers == null || o.BackCovers.Count == 0) { if (gs.scrapeBoxart == true) { o.BackCovers = new List <string>(); o.BackCovers.Add(MEDIA); } } backFound = true; } if (mediaFound == false && t == "media") { if (o.Medias == null || o.Medias.Count == 0) { if (gs.scrapeMedia == true) { o.Medias = new List <string>(); o.Medias.Add(MEDIA); } } mediaFound = true; } if (mediaFound == true && backFound == true && frontFound == true) { break; } } } } } if (gs.scrapeScreenshots == true) { // screenshots // query the screenshots page string baseurlscreen = "http://www.mobygames.com/game/"; string paramscreen = masterrecord.MOBYPlatformAlias + "/" + masterrecord.MOBYAlias + "/screenshots"; string screenPage = ReturnWebpage(baseurlscreen, paramscreen, 10000); // convert page string to htmldoc HtmlDocument sDoc = new HtmlDocument(); sDoc.LoadHtml(screenPage); // get core information if (!screenPage.Contains("There are no user screenshots on file")) { List <HtmlNode> screens = sDoc.DocumentNode.SelectNodes("//a[@class ='thumbnail-image']").ToList(); if (o.Screenshots == null) { o.Screenshots = new List <string>(); } int co = 0; foreach (var screen in screens) { if (co >= gs.maxScreenshots) { break; } var attrib = screen.Attributes["style"].Value; string path = attrib.Replace(");", "").Replace("background-image:url(", "").Replace("/s/", "/l/"); o.Screenshots.Add("http://mobygames.com" + path); co++; } } } return(o); }
public static ScrapedGameObjectWeb ScrapeGame(ScrapedGameObjectWeb o, ScraperOrder order, ProgressDialogController controller, ScraperMaster masterrecord, string message) { bool priority; message = message + "Downloading information for: " + masterrecord.TGDBData.GamesDBTitle + "\n(" + masterrecord.TGDBData.GamesDBPlatformName + ")"; if (order == ScraperOrder.Primary) { message = "Primary Scraping (thegamesdb)\n" + message; } else { message = "Secondary Scraping (thegamesdb)\n" + message; } string BaseImgUrl = "http://thegamesdb.net/banners/"; GlobalSettings gs = GlobalSettings.GetGlobals(); if (order == ScraperOrder.Primary) { controller.SetMessage(message); priority = true; // primary message = o.Data.Title = masterrecord.TGDBData.GamesDBTitle; o.Data.Platform = masterrecord.TGDBData.GamesDBPlatformName; } else { // GDB is secondary scraper controller.SetMessage(message); priority = false; // primary if (o.Data.Title == null) { o.Data.Title = masterrecord.TGDBData.GamesDBTitle; } if (o.Data.Platform == null) { o.Data.Platform = masterrecord.TGDBData.GamesDBPlatformName; } } if (priority == true) { /* Primary Scraping */ // get the text data from thegamesdb.net GDBNETGame g = new GDBNETGame(); g = GDBNETGamesDB.GetGame(o.GdbId); if (g == null) { // Nothing was returned return(o); } o.Data.AlternateTitles = g.AlternateTitles; o.Data.Coop = g.Coop; o.Data.Developer = g.Developer; o.Data.ESRB = g.ESRB; o.Data.Genres = g.Genres; o.Data.Overview = g.Overview; o.Data.Players = g.Players; o.Data.Publisher = g.Publisher; o.Data.Released = g.ReleaseDate; if (gs.scrapeBoxart == true) { if (g.Images.BoxartBack != null) { o.BackCovers.Add(BaseImgUrl + g.Images.BoxartBack.Path); } if (g.Images.BoxartFront != null) { o.FrontCovers.Add(BaseImgUrl + g.Images.BoxartFront.Path); } } if (gs.scrapeBanners == true) { foreach (var s in g.Images.Banners) { o.Banners.Add(BaseImgUrl + s.Path); } } if (gs.scrapeFanart == true) { int co = 0; foreach (var s in g.Images.Fanart) { if (co >= gs.maxFanarts) { break; } o.FanArts.Add(BaseImgUrl + s.Path); co++; } } if (gs.scrapeScreenshots == true) { int co = 0; foreach (var s in g.Images.Screenshots) { if (co >= gs.maxScreenshots) { break; } o.Screenshots.Add(BaseImgUrl + s.Path); co++; } } } else { /* secondary scraping */ // get the text data from thegamesdb.net GDBNETGame g = new GDBNETGame(); g = GDBNETGamesDB.GetGame(o.GdbId); if (g == null) { // Nothing was returned return(o); } if (o.Data.AlternateTitles == null && g.AlternateTitles.Count > 0) { o.Data.AlternateTitles = new List <string>(); o.Data.AlternateTitles.AddRange(g.AlternateTitles); } if (o.Data.Coop == null && g.Coop != null) { o.Data.Coop = g.Coop; } if (o.Data.Developer == null && g.Developer != null) { o.Data.Developer = g.Developer; } if (o.Data.ESRB == null && g.ESRB != null) { o.Data.ESRB = g.ESRB; } if (o.Data.Genres == null && g.Genres != null) { o.Data.Genres = new List <string>(); o.Data.Genres.AddRange(g.Genres); } if (o.Data.Overview == null) { o.Data.Overview = g.Overview; } if (o.Data.Players == null) { o.Data.Players = g.Players; } if (o.Data.Publisher == null) { o.Data.Publisher = g.Publisher; } if (o.Data.Released == null) { o.Data.Released = g.ReleaseDate; } if (gs.scrapeBoxart == true) { if (o.BackCovers.Count == 0 && g.Images.BoxartBack != null) { o.BackCovers.Add(BaseImgUrl + g.Images.BoxartBack.Path); } if (o.FrontCovers.Count == 0 && g.Images.BoxartFront != null) { o.FrontCovers.Add(BaseImgUrl + g.Images.BoxartFront.Path); } } if (gs.scrapeBanners == true) { if (o.Banners.Count == 0 && g.Images.Banners != null) { foreach (var s in g.Images.Banners) { o.Banners.Add(BaseImgUrl + s.Path); } } } if (gs.scrapeFanart == true) { if (o.FanArts.Count == 0 && g.Images.Fanart != null) { int co = 0; foreach (var s in g.Images.Fanart) { if (co >= gs.maxFanarts) { break; } o.FanArts.Add(BaseImgUrl + s.Path); co++; } } } if (gs.scrapeScreenshots == true && g.Images.Screenshots != null) { int co = 0; foreach (var s in g.Images.Screenshots) { if (co >= gs.maxScreenshots) { break; } o.Screenshots.Add(BaseImgUrl + s.Path); co++; } } // remove duplicates //o.Data.AlternateTitles.Distinct(); o.Data.Genres.Distinct(); o.Screenshots.Distinct(); o.FanArts.Distinct(); o.Banners.Distinct(); o.BackCovers.Distinct(); o.FrontCovers.Distinct(); } return(o); }