override public void download() { try { if (!Directory.Exists(SaveTo)) { Directory.CreateDirectory(SaveTo); } if (Properties.Settings.Default.loadHTML) { downloadHTMLPage(); } string[] URLs = getLinks(); for (int y = 0; y < URLs.Length; y++) { General.DownloadToDir(URLs[y], SaveTo); } } catch (WebException webEx) { if (((int)webEx.Status) == 7) { Gone = true; } } catch (UnauthorizedAccessException ex) { MessageBox.Show(ex.Message, "No Permission to access folder"); throw; } }
private void downloadHTMLPage() { List <string> thumbs = new List <string>(); string htmlPage = ""; string str; try { htmlPage = new WebClient().DownloadString(getURL()); string JURL = getURL().Replace(".html", ".json"); string Content = new WebClient().DownloadString(JURL); byte[] bytes = Encoding.ASCII.GetBytes(Content); using (var stream = new MemoryStream(bytes)) { var quotas = new XmlDictionaryReaderQuotas(); var jsonReader = JsonReaderWriterFactory.CreateJsonReader(stream, quotas); var xml = XDocument.Load(jsonReader); str = xml.ToString(); } // get single images XmlDocument doc = new XmlDocument(); doc.LoadXml(str); XmlNodeList xmlTim = doc.DocumentElement.SelectNodes("/root/posts/item/tim"); XmlNodeList xmlExt = doc.DocumentElement.SelectNodes("/root/posts/item/ext"); for (int i = 0; i < xmlExt.Count; i++) { string ext = xmlExt[i].InnerText; // if(ext == ".webm") // ext = ".jpg"; thumbs.Add("https://8ch.net/file_store/thumb/" + xmlTim[i].InnerText + ext); htmlPage = htmlPage.Replace("https://8ch.net/file_store/thumb/" + xmlTim[i].InnerText + ext, "thumb/" + xmlTim[i].InnerText + ext); htmlPage = htmlPage.Replace("=\"/file_store/thumb/" + xmlTim[i].InnerText + ext, "=\"thumb/" + xmlTim[i].InnerText + ext); htmlPage = htmlPage.Replace("=\"/file_store/" + xmlTim[i].InnerText + ext, "=\"" + xmlTim[i].InnerText + ext); htmlPage = htmlPage.Replace("https://media.8ch.net/file_store/thumb/" + xmlTim[i].InnerText + ext, "thumb/" + xmlTim[i].InnerText + ext); htmlPage = htmlPage.Replace("https://media.8ch.net/file_store/" + xmlTim[i].InnerText + ext, xmlTim[i].InnerText + ext); htmlPage = htmlPage.Replace("https://8ch.net/file_store/" + xmlTim[i].InnerText + ext, xmlTim[i].InnerText + ext); } // get images of posts with multiple images xmlTim = doc.DocumentElement.SelectNodes("/root/posts/item/extra_files/item/tim"); xmlExt = doc.DocumentElement.SelectNodes("/root/posts/item/extra_files/item/ext"); for (int i = 0; i < xmlExt.Count; i++) { string ext = xmlExt[i].InnerText; // if(ext == ".webm") // ext = ".jpg"; thumbs.Add("https://8ch.net/file_store/thumb/" + xmlTim[i].InnerText + ext); htmlPage = htmlPage.Replace("https://8ch.net/file_store/thumb/" + xmlTim[i].InnerText + ext, "thumb/" + xmlTim[i].InnerText + ext); htmlPage = htmlPage.Replace("=\"/file_store/thumb/" + xmlTim[i].InnerText + ext, "=\"thumb/" + xmlTim[i].InnerText + ext); htmlPage = htmlPage.Replace("=\"/file_store/" + xmlTim[i].InnerText + ext, "=\"" + xmlTim[i].InnerText + ext); htmlPage = htmlPage.Replace("https://media.8ch.net/file_store/thumb/" + xmlTim[i].InnerText + ext, "thumb/" + xmlTim[i].InnerText + ext); htmlPage = htmlPage.Replace("https://media.8ch.net/file_store/" + xmlTim[i].InnerText + ext, xmlTim[i].InnerText + ext); htmlPage = htmlPage.Replace("https://8ch.net/file_store/" + xmlTim[i].InnerText + ext, xmlTim[i].InnerText + ext); } htmlPage = htmlPage.Replace("=\"/", "=\"https://8ch.net/"); for (int i = 0; i < thumbs.Count; i++) { General.DownloadToDir(thumbs[i], SaveTo + "\\thumb"); } if (!String.IsNullOrWhiteSpace(htmlPage)) { File.WriteAllText(SaveTo + "\\Thread.html", htmlPage); // save thread } } catch { throw; } }
private void downloadHTMLPage() { List <string> thumbs = new List <string>(); List <string> duplicateFileName = new List <string>(); string xmlString; string boardNameSplit = getURL().Split('/')[3]; string threadNumberSplit = getURL().Split('/')[5]; string baseURL1 = "//i.4cdn.org/" + boardNameSplit + "/"; string baseURL2 = "//is2.4chan.org/" + boardNameSplit + "/"; string JURL = "http://a.4cdn.org/" + boardNameSplit + "/thread/" + threadNumberSplit + ".json"; XmlDocument doc = new XmlDocument(); try { //Add a UserAgent to prevent 403 WebClient web = new WebClient(); web.Headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0"; string htmlPage = web.DownloadString(this.getURL()); //Prevent the html from being destroyed by the anti adblock script htmlPage = htmlPage.Replace("f=\"to\"", "f=\"penis\""); //Normalize urls htmlPage = htmlPage.Replace("http:" + baseURL1, baseURL1); htmlPage = htmlPage.Replace("http:" + baseURL2, baseURL2); string json = web.DownloadString(JURL); byte[] bytes = Encoding.ASCII.GetBytes(json); using (var stream = new MemoryStream(bytes)) { var quotas = new XmlDictionaryReaderQuotas(); var jsonReader = JsonReaderWriterFactory.CreateJsonReader(stream, quotas); xmlString = XDocument.Load(jsonReader).ToString(); } doc.LoadXml(xmlString); XmlNodeList xmlImageFileTimestamp = doc.DocumentElement.SelectNodes("/root/posts/item/tim"); XmlNodeList xmlImageFileName = doc.DocumentElement.SelectNodes("/root/posts/item/filename"); XmlNodeList xmlImageFileExtension = doc.DocumentElement.SelectNodes("/root/posts/item/ext"); for (int i = 0; i < xmlImageFileExtension.Count; i++) { string imageFileTime = xmlImageFileTimestamp[i].InnerText + xmlImageFileExtension[i].InnerText; string imageFileName = xmlImageFileName[i].InnerText + xmlImageFileExtension[i].InnerText; string imageURL1 = baseURL1 + xmlImageFileTimestamp[i].InnerText + xmlImageFileExtension[i].InnerText; string imageURL2 = baseURL2 + xmlImageFileTimestamp[i].InnerText + xmlImageFileExtension[i].InnerText; while (duplicateFileName.Contains(imageFileName)) { imageFileName = "_" + imageFileName; } duplicateFileName.Add(imageFileName); htmlPage = htmlPage.Replace(imageURL1, imageFileName); htmlPage = htmlPage.Replace(imageURL2, imageFileName); //Save thumbs for files that need it if (xmlImageFileExtension[i].InnerText == ".webm" /*|| xmlImageFileExtension[i].InnerText == ""*/) { string imageURL = "//t.4cdn.org/" + boardNameSplit + "/" + xmlImageFileTimestamp[i].InnerText + "s.jpg"; thumbs.Add("http:" + imageURL); htmlPage = htmlPage.Replace(baseURL1 + xmlImageFileTimestamp[i].InnerText, "thumb/" + xmlImageFileTimestamp[i].InnerText); htmlPage = htmlPage.Replace(baseURL2 + xmlImageFileTimestamp[i].InnerText, "thumb/" + xmlImageFileTimestamp[i].InnerText); } else { string thumbName = imageFileTime.Split('.')[0] + "s" + ".jpg"; htmlPage = htmlPage.Replace(baseURL1 + thumbName, System.Web.HttpUtility.UrlEncode(imageFileName)); htmlPage = htmlPage.Replace(baseURL2 + thumbName, System.Web.HttpUtility.UrlEncode(imageFileName)); } htmlPage = htmlPage.Replace("/" + imageFileTime, imageFileName); } htmlPage = htmlPage.Replace("=\"//", "=\"http://"); //Save thumbs for files that need it for (int i = 0; i < thumbs.Count; i++) { General.DownloadToDir(new FileInformation(thumbs[i]), this.SaveTo + "\\thumb"); } if (!string.IsNullOrWhiteSpace(htmlPage)) { File.WriteAllText(this.SaveTo + "\\Thread.html", htmlPage); } } catch { throw; } }
private void downloadHTMLPage() { List <string> thumbs = new List <string>(); string htmlPage = ""; string str = ""; string baseURL = "//i.4cdn.org/" + getURL().Split('/')[3] + "/"; string JURL = "http://a.4cdn.org/" + getURL().Split('/')[3] + "/thread/" + getURL().Split('/')[5] + ".json"; try { //Add a UserAgent to prevent 403 WebClient web = new WebClient(); web.Headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0"; htmlPage = web.DownloadString(getURL()); //Prevent the html from being destroyed by the anti adblock script htmlPage = htmlPage.Replace("f=\"to\"", "f=\"penis\""); string json = web.DownloadString(JURL); byte[] bytes = Encoding.ASCII.GetBytes(json); using (var stream = new MemoryStream(bytes)) { var quotas = new XmlDictionaryReaderQuotas(); var jsonReader = JsonReaderWriterFactory.CreateJsonReader(stream, quotas); var xml = XDocument.Load(jsonReader); str = xml.ToString(); } XmlDocument doc = new XmlDocument(); doc.LoadXml(str); XmlNodeList xmlTim = doc.DocumentElement.SelectNodes("/root/posts/item/tim"); XmlNodeList xmlExt = doc.DocumentElement.SelectNodes("/root/posts/item/ext"); for (int i = 0; i < xmlExt.Count; i++) { string old = baseURL + xmlTim[i].InnerText + xmlExt[i].InnerText; string rep = xmlTim[i].InnerText + xmlExt[i].InnerText; htmlPage = htmlPage.Replace(old, rep); //Save thumbs for files that need it if (rep.Split('.')[1] == "webm" /*|| rep.Split('.')[1] == ""*/) { old = "//t.4cdn.org/" + getURL().Split('/')[3] + "/" + xmlTim[i].InnerText + "s.jpg"; thumbs.Add("http:" + old); htmlPage = htmlPage.Replace("//i.4cdn.org/" + getURL().Split('/')[3] + "/" + xmlTim[i].InnerText, "thumb/" + xmlTim[i].InnerText); } else { string thumbName = rep.Split('.')[0] + "s"; htmlPage = htmlPage.Replace(thumbName + ".jpg", rep.Split('.')[0] + "." + rep.Split('.')[1]); htmlPage = htmlPage.Replace("/" + thumbName, thumbName); htmlPage = htmlPage.Replace("//i.4cdn.org/" + getURL().Split('/')[3] + "/" + xmlTim[i].InnerText, xmlTim[i].InnerText); } htmlPage = htmlPage.Replace("//is2.4chan.org/" + getURL().Split('/')[3] + "/" + xmlTim[i].InnerText, xmlTim[i].InnerText); htmlPage = htmlPage.Replace("/" + rep, rep); } htmlPage = htmlPage.Replace("=\"//", "=\"http://"); //Save thumbs for files that need it for (int i = 0; i < thumbs.Count; i++) { General.DownloadToDir(thumbs[i], SaveTo + "\\thumb"); } if (!string.IsNullOrWhiteSpace(htmlPage)) { File.WriteAllText(SaveTo + "\\Thread.html", htmlPage); } } catch { throw; } }