Beispiel #1
0
        public static string BuildFilename(string title, string extension, params string[] filenameSubtypes)
        {
            StringBuilder sb = new StringBuilder();

            sb.Append(title);
            foreach (string s in filenameSubtypes)
            {
                if (!string.IsNullOrWhiteSpace(s))
                {
                    sb.Append("_" + s);
                }
            }
            sb.Append(extension);
            return(Utility.GetSafeFilename(sb.ToString()));
        }
Beispiel #2
0
        public static void GetFile(string title, string filenameSubtype, string URL, string savePathRoot)
        {
            HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(URL);

            try {
                using (HttpWebResponse response = (HttpWebResponse)request.GetResponse()) {
                    //build filename
                    string extension = GetExtension(response);

                    if (extension != null)
                    {
                        string filename = BuildFilename(title, extension, filenameSubtype);

                        //build savepath
                        string subfolder = Utility.GetSafeFilename(title) + "\\";
                        string savePath  = savePathRoot + subfolder;

                        if (!File.Exists(Path.Combine(savePath, filename)))
                        {
                            Log("Downloading " + filename);
                            if (!Directory.Exists(savePath))
                            {
                                Directory.CreateDirectory(savePath);
                            }

                            var responseStream = response.GetResponseStream();

                            using (var fileStream = File.Create(Path.Combine(savePath, filename))) {
                                responseStream.CopyTo(fileStream);
                            }
                            if (DoCourtesyWait)
                            {
                                System.Threading.Thread.Sleep(CourtestWaitTime);
                            }
                        }
                        else
                        {
                            Log("Skipping " + filename);
                        }
                    }
                }
            } catch (WebException ex) {
                Log("Encountered error fetching file: " + ex.Message);
            }
        }
        //public static readonly int TotalPages = 141;

        static void Main(string[] args)
        {
            HtmlWeb         web          = new HtmlWeb();
            WebClient       client       = new WebClient();
            List <string>   gamePageURLs = new List <string>();
            List <HtmlNode> extrasLinks  = new List <HtmlNode>();
            int             gameCount    = 0;

            //iterate through each page of games
            for (int page = 1; page > 0; page++)
            {
                Debug.WriteLine("Scraping Listing Page " + (page));
                HtmlNode rawPageNode = web.Load(Site + page).DocumentNode;

                //Reloaded returns 404 when you ask for a page after the final page
                if (rawPageNode.ChildNodes["html"].ChildNodes["head"].ChildNodes["title"].InnerHtml == "404 Not Found")
                {
                    break;
                }

                gamePageURLs.AddRange(BuildGameNodeList(rawPageNode.Descendants(1)));

                gameCount = 1;
                foreach (string URL in gamePageURLs)
                {
                    Debug.WriteLine("Scraping Page #" + (page) + " Game #" + gameCount); gameCount++;
                    HtmlNode gamePage = web.Load(RootDownloadURL + URL).DocumentNode;

                    //find extras and add to list
                    foreach (HtmlNode gameNode in gamePage.Descendants(1))
                    {
                        if (gameNode.Name == "a")
                        {
                            if (gameNode.OuterHtml.Contains("ExtraID"))
                            {
                                extrasLinks.Add(gameNode);
                            }
                        }
                    }
                    if (Wait)
                    {
                        System.Threading.Thread.Sleep(500);
                    }

                    //for each extra, convert to link and metadata and download
                    foreach (HtmlNode extrasNode in extrasLinks)
                    {
                        string downloadUrl = RootDownloadURL + extrasNode.OuterHtml.Split('"')[1];

                        //build filename
                        string gameTitle = extrasNode.OwnerDocument.DocumentNode.ChildNodes["html"].ChildNodes["head"].ChildNodes["title"].InnerHtml.Replace(" @  Reloaded.org", "").Trim();
                        string extraType = WebUtility.HtmlDecode(extrasNode.InnerText).Replace('|', '-').Trim();
                        //string extension = Path.GetExtension(downloadUrl);
                        string filename = Utility.GetSafeFilename(gameTitle + "_" + extraType);

                        //build savepath
                        string subfolder = Utility.GetSafeFilename(gameTitle) + "\\";
                        string savePath  = SavePathRoot + subfolder;

                        //Have to determine extension after the fact
                        //if (extension == "")
                        //    continue;

                        if (!Directory.Exists(savePath))
                        {
                            Directory.CreateDirectory(savePath);
                        }

                        HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(downloadUrl);
                        using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
                        {
                            var contentName = response.Headers["Content-Disposition"].Split(new string[] { "=" }, StringSplitOptions.None)[1];
                            if (contentName[contentName.Length - 1] == ';')
                            {
                                contentName = contentName.TrimEnd(';');
                            }
                            string extension = Path.GetExtension(contentName);
                            if (!File.Exists(savePath + filename + extension))
                            {
                                Debug.WriteLine("Downloading " + filename + extension);
                                var responseStream = response.GetResponseStream();
                                using (var fileStream = File.Create(Path.Combine(savePath, filename + extension)))
                                {
                                    responseStream.CopyTo(fileStream);
                                }
                            }
                            else
                            {
                                Debug.WriteLine("Skipping " + filename + extension);
                            }
                        }

                        //if (!File.Exists(savePath + filename))
                        //{
                        //    Debug.WriteLine("Downloading " + filename);
                        //    try
                        //    {
                        //        client.DownloadFile(downloadUrl, savePath + filename);
                        //    }
                        //    catch (WebException ex)
                        //    {
                        //        if(ex.Message.Contains("The operation has timed out"))
                        //        {
                        //            Debug.WriteLine(filename + " timed out");
                        //            continue;
                        //        }
                        //    }

                        //    string extension = "";
                        //    string mimeType = Utility.GetMimeFromFile(savePath + filename);
                        //    switch (mimeType)
                        //    {
                        //        case "application/x-zip-compressed":
                        //            extension = "zip";
                        //            break;
                        //        case "image/pjpeg":
                        //            extension = "jpeg";
                        //            break;
                        //        case "text/richtext":
                        //            extension = "rtf";
                        //            break;
                        //        case "text/plain":
                        //            extension = "txt";
                        //            break;
                        //        case "application/pdf":
                        //            extension = "pdf";
                        //            break;
                        //        case "image/x-png":
                        //            extension = "png";
                        //            break;
                        //        default:
                        //            break;
                        //    }
                        //    if (Wait) System.Threading.Thread.Sleep(3000);
                        //}
                        //else
                        //    Debug.WriteLine("Skipping " + filename);
                    }

                    extrasLinks.Clear();
                }
                gamePageURLs.Clear();
                if (Wait)
                {
                    System.Threading.Thread.Sleep(500);
                }
            }
        }