示例#1
0
        public static List <PSX_Games> ScrapeInitialList(bool downloadFiles)
        {
            List <PSX_Games> list = new List <PSX_Games>();
            HtmlDocument     doc  = new HtmlDocument();

            doc.OptionFixNestedTags = true;

            List <string> urls = new List <string>
            {
                "http://psxdatacenter.com/ulist.html",
                "http://psxdatacenter.com/plist.html",
                "http://psxdatacenter.com/jlist.html",
            };

            List <string> locals = new List <string>
            {
                AppDomain.CurrentDomain.BaseDirectory + @"..\..\_Debug\DATDB\DATFiles\PSXDATACENTER\ntscu.html",
                AppDomain.CurrentDomain.BaseDirectory + @"..\..\_Debug\DATDB\DATFiles\PSXDATACENTER\pal.html",
                AppDomain.CurrentDomain.BaseDirectory + @"..\..\_Debug\DATDB\DATFiles\PSXDATACENTER\ntscj.html",
            };

            if (downloadFiles == true)
            {
                // download the webpages
                for (int i = 0; i < urls.Count(); i++)
                {
                    // download the latest version of the webpage
                    string webResult = "";
                    using (var client = new WebClient())
                    {
                        webResult = client.DownloadString(urls[i]);
                    }
                    // save to disk
                    File.WriteAllText(locals[i], webResult);
                }
            }


            for (int i = 0; i < urls.Count(); i++)
            {
                // load html file to string
                string s = File.ReadAllText(locals[i]);
                doc.LoadHtml(s);

                // get all tables

                var tables = doc.DocumentNode
                             .Descendants("table")
                             .Where(d => d.Attributes.Contains("class") &&
                                    d.Attributes["class"].Value.Contains("sectiontable")).Cast <HtmlNode>();


                var findTables = from table in doc.DocumentNode.SelectNodes("//table")
                                 .Where(a => a.Attributes.Contains("class") && a.Attributes["class"].Value.Contains("sectiontable")).Cast <HtmlNode>()
                                 from row in table.SelectNodes("tr").Cast <HtmlNode>()
                                 select row;

                // iterate through rows
                foreach (HtmlNode row in findTables)
                {
                    if (row.InnerText == "\r\n" || row.InnerText.Trim() == "")
                    {
                        continue;
                    }

                    var cells = row.SelectNodes("th|td").ToArray();

                    // Get all the serial numbers first and iterate through them (if no serial number present - ignore)
                    string[] snss = cells[1].InnerHtml.Split(new string[] { "<br>" }, StringSplitOptions.None);
                    string[] sns  = snss.Where(a => a.Trim() != "").ToArray();

                    for (int sn = 0; sn < sns.Length; sn++)
                    {
                        PSX_Games record = new PSX_Games();
                        record.serial = sns[sn].Trim();

                        // get game name
                        string gText = cells[2].InnerText.Replace("&nbsp;", "").Replace("\r\n", "").Trim().Replace("  ", " ");
                        if (sns.Length > 1)
                        {
                            gText += " [Disc " + (sn + 1) + "]";
                        }
                        record.name = gText;

                        // Languages
                        string langs = cells[3].InnerText.Replace("\r\n", "").Replace("&nbsp;", "").Trim();
                        record.languages = langs;

                        // info url
                        if (cells[0].InnerHtml != "")
                        {
                            /*
                             * int pFrom = cells[0].InnerHtml.IndexOf("<a href=") + "<a href=".Length;
                             * int pTo = cells[0].InnerHtml.LastIndexOf(" target=");
                             * string result = cells[0].InnerHtml.Substring(pFrom, pTo - pFrom).Replace("\"", "");
                             */

                            string[] arr1   = cells[0].InnerHtml.Split(new string[] { "href=\"games/" }, StringSplitOptions.None);
                            string   result = "http://psxdatacenter.com/games/" + arr1[1].Replace("\">INFO</a>", "").Trim();


                            record.infoUrl = result;
                        }

                        // region
                        if (record.serial.Contains("SLPS") ||
                            record.serial.Contains("SCPS") ||
                            record.serial.Contains("SLPM") ||
                            record.serial.Contains("SIPS"))
                        {
                            record.region = "JAP";
                        }

                        if (record.serial.Contains("SLES") ||
                            record.serial.Contains("SCES"))
                        {
                            record.region = "EUR";
                        }

                        if (record.serial.Contains("SLUS") ||
                            record.serial.Contains("LSP") ||
                            record.serial.Contains("SCUS"))
                        {
                            record.region = "USA";
                        }


                        // add to master list
                        list.Add(record);
                    }
                }
            }

            // update in database
            int[] r = PSX_Games.SaveToDatabase(list);

            MessageBox.Show("Games Added: " + r[0] +
                            "\nGames Updated: " + r[1]);


            return(list);
        }
示例#2
0
        public static async void GetExtraDetail()
        {
            PsxDc c = new PsxDc();

            var mySettings = new MetroDialogSettings()
            {
                NegativeButtonText = "Cancel",
                AnimateShow        = false,
                AnimateHide        = false
            };
            string output     = "Getting games that need additional info scraped...\n";
            var    controller = await c.mw.ShowProgressAsync("PSXDATACENTER Builder", output, true, settings : mySettings);

            controller.SetCancelable(true);
            controller.SetIndeterminate();
            await Task.Delay(1);

            await Task.Run(() =>
            {
                // load games
                List <PSX_Games> games = PSX_Games.GetGames()
                                         .Where(
                    a => a.infoUrl != null && a.infoUrl.Trim() != "" &&
                    (
                        a.publisher == null ||
                        a.developer == null ||
                        a.year == null
                    )
                    )
                                         .OrderBy(x => x.id)
                                         .ToList();

                int totalGamesToProcess = games.Count();

                //List<PSX_Games> gWorking = new List<PSX_Games>();

                output = "Performing lookups...\n";

                // iterate through each game and get extra details
                for (int i = 0; i < totalGamesToProcess; i++)
                {
                    controller.SetMessage(output + "\nGame " + i + " of " + totalGamesToProcess);
                    var re = ScrapeIndividualInfo(games[i]);
                    if (re == null)
                    {
                        continue;
                    }

                    // some arbitrary wait time to not spam the server
                    //System.Threading.Thread.Sleep(1000);

                    // add/update the database
                    PSX_Games.SaveToDatabase(re);
                }
            });

            await controller.CloseAsync();

            if (controller.IsCanceled)
            {
                await c.mw.ShowMessageAsync("DAT Builder", "Linking Cancelled");
            }
            else
            {
                await c.mw.ShowMessageAsync("DAT Builder", "Linking Completed");
            }
        }