public static string BuildFilename(string title, string extension, params string[] filenameSubtypes) { StringBuilder sb = new StringBuilder(); sb.Append(title); foreach (string s in filenameSubtypes) { if (!string.IsNullOrWhiteSpace(s)) { sb.Append("_" + s); } } sb.Append(extension); return(Utility.GetSafeFilename(sb.ToString())); }
public static void GetFile(string title, string filenameSubtype, string URL, string savePathRoot) { HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(URL); try { using (HttpWebResponse response = (HttpWebResponse)request.GetResponse()) { //build filename string extension = GetExtension(response); if (extension != null) { string filename = BuildFilename(title, extension, filenameSubtype); //build savepath string subfolder = Utility.GetSafeFilename(title) + "\\"; string savePath = savePathRoot + subfolder; if (!File.Exists(Path.Combine(savePath, filename))) { Log("Downloading " + filename); if (!Directory.Exists(savePath)) { Directory.CreateDirectory(savePath); } var responseStream = response.GetResponseStream(); using (var fileStream = File.Create(Path.Combine(savePath, filename))) { responseStream.CopyTo(fileStream); } if (DoCourtesyWait) { System.Threading.Thread.Sleep(CourtestWaitTime); } } else { Log("Skipping " + filename); } } } } catch (WebException ex) { Log("Encountered error fetching file: " + ex.Message); } }
//public static readonly int TotalPages = 141; static void Main(string[] args) { HtmlWeb web = new HtmlWeb(); WebClient client = new WebClient(); List <string> gamePageURLs = new List <string>(); List <HtmlNode> extrasLinks = new List <HtmlNode>(); int gameCount = 0; //iterate through each page of games for (int page = 1; page > 0; page++) { Debug.WriteLine("Scraping Listing Page " + (page)); HtmlNode rawPageNode = web.Load(Site + page).DocumentNode; //Reloaded returns 404 when you ask for a page after the final page if (rawPageNode.ChildNodes["html"].ChildNodes["head"].ChildNodes["title"].InnerHtml == "404 Not Found") { break; } gamePageURLs.AddRange(BuildGameNodeList(rawPageNode.Descendants(1))); gameCount = 1; foreach (string URL in gamePageURLs) { Debug.WriteLine("Scraping Page #" + (page) + " Game #" + gameCount); gameCount++; HtmlNode gamePage = web.Load(RootDownloadURL + URL).DocumentNode; //find extras and add to list foreach (HtmlNode gameNode in gamePage.Descendants(1)) { if (gameNode.Name == "a") { if (gameNode.OuterHtml.Contains("ExtraID")) { extrasLinks.Add(gameNode); } } } if (Wait) { System.Threading.Thread.Sleep(500); } //for each extra, convert to link and metadata and download foreach (HtmlNode extrasNode in extrasLinks) { string downloadUrl = RootDownloadURL + extrasNode.OuterHtml.Split('"')[1]; //build filename string gameTitle = extrasNode.OwnerDocument.DocumentNode.ChildNodes["html"].ChildNodes["head"].ChildNodes["title"].InnerHtml.Replace(" @ Reloaded.org", "").Trim(); string extraType = WebUtility.HtmlDecode(extrasNode.InnerText).Replace('|', '-').Trim(); //string extension = Path.GetExtension(downloadUrl); string filename = Utility.GetSafeFilename(gameTitle + "_" + extraType); //build savepath string subfolder = Utility.GetSafeFilename(gameTitle) + "\\"; string savePath = SavePathRoot + subfolder; //Have to determine extension after the fact //if (extension == "") // continue; if (!Directory.Exists(savePath)) { Directory.CreateDirectory(savePath); } HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(downloadUrl); using (HttpWebResponse response = (HttpWebResponse)request.GetResponse()) { var contentName = response.Headers["Content-Disposition"].Split(new string[] { "=" }, StringSplitOptions.None)[1]; if (contentName[contentName.Length - 1] == ';') { contentName = contentName.TrimEnd(';'); } string extension = Path.GetExtension(contentName); if (!File.Exists(savePath + filename + extension)) { Debug.WriteLine("Downloading " + filename + extension); var responseStream = response.GetResponseStream(); using (var fileStream = File.Create(Path.Combine(savePath, filename + extension))) { responseStream.CopyTo(fileStream); } } else { Debug.WriteLine("Skipping " + filename + extension); } } //if (!File.Exists(savePath + filename)) //{ // Debug.WriteLine("Downloading " + filename); // try // { // client.DownloadFile(downloadUrl, savePath + filename); // } // catch (WebException ex) // { // if(ex.Message.Contains("The operation has timed out")) // { // Debug.WriteLine(filename + " timed out"); // continue; // } // } // string extension = ""; // string mimeType = Utility.GetMimeFromFile(savePath + filename); // switch (mimeType) // { // case "application/x-zip-compressed": // extension = "zip"; // break; // case "image/pjpeg": // extension = "jpeg"; // break; // case "text/richtext": // extension = "rtf"; // break; // case "text/plain": // extension = "txt"; // break; // case "application/pdf": // extension = "pdf"; // break; // case "image/x-png": // extension = "png"; // break; // default: // break; // } // if (Wait) System.Threading.Thread.Sleep(3000); //} //else // Debug.WriteLine("Skipping " + filename); } extrasLinks.Clear(); } gamePageURLs.Clear(); if (Wait) { System.Threading.Thread.Sleep(500); } } }