private static void DownloadImages(Cam cam) { int solsProcessed = 0; int imagesProcessed = 0; int imagesDownloaded = 0; if (basePage == null) { Console.Clear(); Console.WriteLine(String.Format("Attempting to contact {0}", MslCamConstants.RawImageUrl)); HtmlWeb baseWeb = new HtmlWeb(); basePage = TryLoadDoc(MslCamConstants.RawImageUrl, baseWeb); } HtmlNodeCollection baseContainers = basePage.DocumentNode.SelectNodes(@"//div[@class='image_set_container']"); foreach (HtmlNode container in baseContainers) { HtmlNodeCollection baseSolLinks = container.SelectNodes(@".//a[starts-with(@href,'./?s=')]"); if (container.ChildNodes[1].InnerText.Contains(cam.CamContainer)) { Console.Clear(); Console.WriteLine(String.Format("Attempting to download new images for {0}...", cam.CamName)); Parallel.ForEach(baseSolLinks, solLink => { if (solLink.InnerHtml.StartsWith("Sol")) { string[] solTitles = solLink.InnerHtml.Split(new char[] { '\n' }); int solNumber = int.Parse(solTitles[1]); string solUrl = solLink.Attributes["href"].Value.Substring(1); try { HtmlWeb solWeb = new HtmlWeb(); HtmlDocument solDoc; solDoc = TryLoadDoc(String.Format("{0}{1}", MslCamConstants.RawImageUrl, solUrl), solWeb); HtmlNode solContent = solDoc.DocumentNode.SelectSingleNode(@"//td[@class='pageContent']"); HtmlNodeCollection solTables = solContent.SelectNodes(@".//table"); List<HtmlNode> solContainers = new List<HtmlNode>(); HtmlNode solStrong = solDoc.DocumentNode.SelectSingleNode(@"/html[1]/body[1]/div[1]/div[1]/div[1]/div[3]/table[1]/tr[2]/td[1]/div[2]/table[1]/tr[4]"); if (solStrong != null && solStrong.InnerText.Contains(MslCamConstants.FullDataProductName)) { foreach (HtmlNode tr in solStrong.ParentNode.ChildNodes) { if (!tr.InnerText.StartsWith(MslCamConstants.FullDataProductName) && tr.InnerText.Contains("Data Product")) break; HtmlNodeCollection imgDataDivs = tr.SelectNodes(@".//div[@class='RawImageCaption']"); if (imgDataDivs != null) solContainers.AddRange(imgDataDivs.ToList()); } } if (solContainers != null) Parallel.ForEach(solContainers, solImage => { string[] imgTitles = solImage.InnerText.Split(new char[] { '\n', '&' }, StringSplitOptions.RemoveEmptyEntries); string imgCamName = imgTitles[0]; DateTime imgTimeStamp = DateTime.Parse(imgTitles[1]); if (imgCamName.Contains(cam.CamName)) { try { using (MSLScraperEntities mslContext = new MSLScraperEntities()) { if (mslContext.SolImageData.Count(x => x.Cam == imgCamName && x.Sol == solNumber && x.TimeStamp == imgTimeStamp) == 0) { HtmlNode imgFullLinkNode = solDoc.DocumentNode.SelectSingleNode(solImage.XPath + @"/nobr[1]/div[1]/a[1]"); string imgFullLinkUrl = imgFullLinkNode.Attributes["href"].Value; SolImageData newImageData = new SolImageData(); newImageData.Sol = solNumber; newImageData.Cam = imgCamName; newImageData.TimeStamp = imgTimeStamp; newImageData.ImageUrl = imgFullLinkUrl; Image bitmap = TryLoadImage(imgFullLinkUrl); if (bitmap != null) { using (MemoryStream stream = new MemoryStream()) { bitmap.Save(stream, System.Drawing.Imaging.ImageFormat.Bmp); bitmap.Dispose(); stream.Position = 0; byte[] data = new byte[stream.Length]; stream.Read(data, 0, (int)stream.Length); newImageData.ImageData = data; } if (mslContext.SolImageData.Count(x => x.Cam == newImageData.Cam && x.Sol == newImageData.Sol && x.TimeStamp == newImageData.TimeStamp) == 0) { mslContext.SolImageData.Add(newImageData); mslContext.SaveChanges(); imagesDownloaded++; WriteDownloadProgress(cam, solsProcessed, imagesProcessed, imagesDownloaded); } } } } } catch (Exception ex) { string errorMessage = String.Format("ERROR while downloading {0} Sol {1} Timestamp {2}", cam.CamName, solNumber, imgTitles[1]); Console.WriteLine(String.Format("{0}: {1}", errorMessage, ex.Message)); errorCollection.Enqueue(new Exception(errorMessage, ex)); } imagesProcessed++; WriteDownloadProgress(cam, solsProcessed, imagesProcessed, imagesDownloaded); } }); } catch (Exception ex) { string errorMessage = String.Format("ERROR while downloading {0} Sol {1} images", cam.CamName, solNumber); Console.WriteLine(String.Format("{0}: {1}", errorMessage, ex.Message)); errorCollection.Enqueue(new Exception(errorMessage, ex)); } solsProcessed++; WriteDownloadProgress(cam, solsProcessed, imagesProcessed, imagesDownloaded); } }); } } }
private static void DownloadImages(Cam cam) { int solsProcessed = 0; int imagesProcessed = 0; int imagesDownloaded = 0; if (basePage == null) { Console.Clear(); Console.WriteLine(String.Format("Attempting to contact {0}", MslCamConstants.RawImageUrl)); HtmlWeb baseWeb = new HtmlWeb(); basePage = TryLoadDoc(MslCamConstants.RawImageUrl, baseWeb); } HtmlNodeCollection baseContainers = basePage.DocumentNode.SelectNodes(@"//div[@class='image_set_container']"); foreach (HtmlNode container in baseContainers) { HtmlNodeCollection baseSolLinks = container.SelectNodes(@".//a[starts-with(@href,'./?s=')]"); if (container.ChildNodes[1].InnerText.Contains(cam.CamContainer)) { Console.Clear(); Console.WriteLine(String.Format("Attempting to download new images for {0}...", cam.CamName)); Parallel.ForEach(baseSolLinks, solLink => { if (solLink.InnerHtml.StartsWith("Sol")) { string[] solTitles = solLink.InnerHtml.Split(new char[] { '\n' }); int solNumber = int.Parse(solTitles[1]); string solUrl = solLink.Attributes["href"].Value.Substring(1); try { HtmlWeb solWeb = new HtmlWeb(); HtmlDocument solDoc; solDoc = TryLoadDoc(String.Format("{0}{1}", MslCamConstants.RawImageUrl, solUrl), solWeb); HtmlNode solContent = solDoc.DocumentNode.SelectSingleNode(@"//td[@class='pageContent']"); HtmlNodeCollection solTables = solContent.SelectNodes(@".//table"); List <HtmlNode> solContainers = new List <HtmlNode>(); HtmlNode solStrong = solDoc.DocumentNode.SelectSingleNode(@"/html[1]/body[1]/div[1]/div[1]/div[1]/div[3]/table[1]/tr[2]/td[1]/div[2]/table[1]/tr[4]"); if (solStrong != null && solStrong.InnerText.Contains(MslCamConstants.FullDataProductName)) { foreach (HtmlNode tr in solStrong.ParentNode.ChildNodes) { if (!tr.InnerText.StartsWith(MslCamConstants.FullDataProductName) && tr.InnerText.Contains("Data Product")) { break; } HtmlNodeCollection imgDataDivs = tr.SelectNodes(@".//div[@class='RawImageCaption']"); if (imgDataDivs != null) { solContainers.AddRange(imgDataDivs.ToList()); } } } if (solContainers != null) { Parallel.ForEach(solContainers, solImage => { string[] imgTitles = solImage.InnerText.Split(new char[] { '\n', '&' }, StringSplitOptions.RemoveEmptyEntries); string imgCamName = imgTitles[0]; DateTime imgTimeStamp = DateTime.Parse(imgTitles[1]); if (imgCamName.Contains(cam.CamName)) { try { using (MSLScraperEntities mslContext = new MSLScraperEntities()) { if (mslContext.SolImageData.Count(x => x.Cam == imgCamName && x.Sol == solNumber && x.TimeStamp == imgTimeStamp) == 0) { HtmlNode imgFullLinkNode = solDoc.DocumentNode.SelectSingleNode(solImage.XPath + @"/nobr[1]/div[1]/a[1]"); string imgFullLinkUrl = imgFullLinkNode.Attributes["href"].Value; SolImageData newImageData = new SolImageData(); newImageData.Sol = solNumber; newImageData.Cam = imgCamName; newImageData.TimeStamp = imgTimeStamp; newImageData.ImageUrl = imgFullLinkUrl; Image bitmap = TryLoadImage(imgFullLinkUrl); if (bitmap != null) { using (MemoryStream stream = new MemoryStream()) { bitmap.Save(stream, System.Drawing.Imaging.ImageFormat.Bmp); bitmap.Dispose(); stream.Position = 0; byte[] data = new byte[stream.Length]; stream.Read(data, 0, (int)stream.Length); newImageData.ImageData = data; } if (mslContext.SolImageData.Count(x => x.Cam == newImageData.Cam && x.Sol == newImageData.Sol && x.TimeStamp == newImageData.TimeStamp) == 0) { mslContext.SolImageData.Add(newImageData); mslContext.SaveChanges(); imagesDownloaded++; WriteDownloadProgress(cam, solsProcessed, imagesProcessed, imagesDownloaded); } } } } } catch (Exception ex) { string errorMessage = String.Format("ERROR while downloading {0} Sol {1} Timestamp {2}", cam.CamName, solNumber, imgTitles[1]); Console.WriteLine(String.Format("{0}: {1}", errorMessage, ex.Message)); errorCollection.Enqueue(new Exception(errorMessage, ex)); } imagesProcessed++; WriteDownloadProgress(cam, solsProcessed, imagesProcessed, imagesDownloaded); } }); } } catch (Exception ex) { string errorMessage = String.Format("ERROR while downloading {0} Sol {1} images", cam.CamName, solNumber); Console.WriteLine(String.Format("{0}: {1}", errorMessage, ex.Message)); errorCollection.Enqueue(new Exception(errorMessage, ex)); } solsProcessed++; WriteDownloadProgress(cam, solsProcessed, imagesProcessed, imagesDownloaded); } }); } } }