コード例 #1
0
ファイル: Program.cs プロジェクト: Bombe99/MSL.Scraper
        private static void DownloadImages(Cam cam)
        {
            int solsProcessed = 0;
            int imagesProcessed = 0;
            int imagesDownloaded = 0;

            if (basePage == null)
            {
                Console.Clear();
                Console.WriteLine(String.Format("Attempting to contact {0}", MslCamConstants.RawImageUrl));

                HtmlWeb baseWeb = new HtmlWeb();
                basePage = TryLoadDoc(MslCamConstants.RawImageUrl, baseWeb);
            }

            HtmlNodeCollection baseContainers = basePage.DocumentNode.SelectNodes(@"//div[@class='image_set_container']");

            foreach (HtmlNode container in baseContainers)
            {
                HtmlNodeCollection baseSolLinks = container.SelectNodes(@".//a[starts-with(@href,'./?s=')]");

                if (container.ChildNodes[1].InnerText.Contains(cam.CamContainer))
                {
                    Console.Clear();
                    Console.WriteLine(String.Format("Attempting to download new images for {0}...", cam.CamName));

                    Parallel.ForEach(baseSolLinks, solLink =>
                    {
                        if (solLink.InnerHtml.StartsWith("Sol"))
                        {
                            string[] solTitles = solLink.InnerHtml.Split(new char[] { '\n' });
                            int solNumber = int.Parse(solTitles[1]);
                            string solUrl = solLink.Attributes["href"].Value.Substring(1);

                            try
                            {
                                HtmlWeb solWeb = new HtmlWeb();
                                HtmlDocument solDoc;
                                solDoc = TryLoadDoc(String.Format("{0}{1}", MslCamConstants.RawImageUrl, solUrl), solWeb);

                                HtmlNode solContent = solDoc.DocumentNode.SelectSingleNode(@"//td[@class='pageContent']");
                                HtmlNodeCollection solTables = solContent.SelectNodes(@".//table");
                                List<HtmlNode> solContainers = new List<HtmlNode>();

                                HtmlNode solStrong = solDoc.DocumentNode.SelectSingleNode(@"/html[1]/body[1]/div[1]/div[1]/div[1]/div[3]/table[1]/tr[2]/td[1]/div[2]/table[1]/tr[4]");
                                if (solStrong != null && solStrong.InnerText.Contains(MslCamConstants.FullDataProductName))
                                {
                                    foreach (HtmlNode tr in solStrong.ParentNode.ChildNodes)
                                    {
                                        if (!tr.InnerText.StartsWith(MslCamConstants.FullDataProductName) && tr.InnerText.Contains("Data Product"))
                                            break;

                                        HtmlNodeCollection imgDataDivs = tr.SelectNodes(@".//div[@class='RawImageCaption']");
                                        if (imgDataDivs != null)
                                            solContainers.AddRange(imgDataDivs.ToList());
                                    }
                                }

                                if (solContainers != null)
                                    Parallel.ForEach(solContainers, solImage =>
                                    {
                                        string[] imgTitles = solImage.InnerText.Split(new char[] { '\n', '&' }, StringSplitOptions.RemoveEmptyEntries);
                                        string imgCamName = imgTitles[0];
                                        DateTime imgTimeStamp = DateTime.Parse(imgTitles[1]);
                                        if (imgCamName.Contains(cam.CamName))
                                        {
                                            try
                                            {

                                                using (MSLScraperEntities mslContext = new MSLScraperEntities())
                                                {
                                                    if (mslContext.SolImageData.Count(x => x.Cam == imgCamName && x.Sol == solNumber && x.TimeStamp == imgTimeStamp) == 0)
                                                    {
                                                        HtmlNode imgFullLinkNode = solDoc.DocumentNode.SelectSingleNode(solImage.XPath + @"/nobr[1]/div[1]/a[1]");
                                                        string imgFullLinkUrl = imgFullLinkNode.Attributes["href"].Value;

                                                        SolImageData newImageData = new SolImageData();
                                                        newImageData.Sol = solNumber;
                                                        newImageData.Cam = imgCamName;
                                                        newImageData.TimeStamp = imgTimeStamp;
                                                        newImageData.ImageUrl = imgFullLinkUrl;

                                                        Image bitmap = TryLoadImage(imgFullLinkUrl);

                                                        if (bitmap != null)
                                                        {
                                                            using (MemoryStream stream = new MemoryStream())
                                                            {
                                                                bitmap.Save(stream, System.Drawing.Imaging.ImageFormat.Bmp);
                                                                bitmap.Dispose();
                                                                stream.Position = 0;
                                                                byte[] data = new byte[stream.Length];
                                                                stream.Read(data, 0, (int)stream.Length);
                                                                newImageData.ImageData = data;
                                                            }

                                                            if (mslContext.SolImageData.Count(x => x.Cam == newImageData.Cam && x.Sol == newImageData.Sol && x.TimeStamp == newImageData.TimeStamp) == 0)
                                                            {
                                                                mslContext.SolImageData.Add(newImageData);
                                                                mslContext.SaveChanges();

                                                                imagesDownloaded++;
                                                                WriteDownloadProgress(cam, solsProcessed, imagesProcessed, imagesDownloaded);
                                                            }
                                                        }
                                                    }
                                                }

                                            }
                                            catch (Exception ex)
                                            {
                                                string errorMessage = String.Format("ERROR while downloading {0} Sol {1} Timestamp {2}", cam.CamName, solNumber, imgTitles[1]);
                                                Console.WriteLine(String.Format("{0}: {1}", errorMessage, ex.Message));
                                                errorCollection.Enqueue(new Exception(errorMessage, ex));
                                            }

                                            imagesProcessed++;
                                            WriteDownloadProgress(cam, solsProcessed, imagesProcessed, imagesDownloaded);
                                        }
                                    });
                            }
                            catch (Exception ex)
                            {
                                string errorMessage = String.Format("ERROR while downloading {0} Sol {1} images", cam.CamName, solNumber);
                                Console.WriteLine(String.Format("{0}: {1}", errorMessage, ex.Message));
                                errorCollection.Enqueue(new Exception(errorMessage, ex));
                            }

                            solsProcessed++;
                            WriteDownloadProgress(cam, solsProcessed, imagesProcessed, imagesDownloaded);
                        }
                    });
                }
            }
        }
コード例 #2
0
        private static void DownloadImages(Cam cam)
        {
            int solsProcessed    = 0;
            int imagesProcessed  = 0;
            int imagesDownloaded = 0;

            if (basePage == null)
            {
                Console.Clear();
                Console.WriteLine(String.Format("Attempting to contact {0}", MslCamConstants.RawImageUrl));

                HtmlWeb baseWeb = new HtmlWeb();
                basePage = TryLoadDoc(MslCamConstants.RawImageUrl, baseWeb);
            }

            HtmlNodeCollection baseContainers = basePage.DocumentNode.SelectNodes(@"//div[@class='image_set_container']");

            foreach (HtmlNode container in baseContainers)
            {
                HtmlNodeCollection baseSolLinks = container.SelectNodes(@".//a[starts-with(@href,'./?s=')]");

                if (container.ChildNodes[1].InnerText.Contains(cam.CamContainer))
                {
                    Console.Clear();
                    Console.WriteLine(String.Format("Attempting to download new images for {0}...", cam.CamName));

                    Parallel.ForEach(baseSolLinks, solLink =>
                    {
                        if (solLink.InnerHtml.StartsWith("Sol"))
                        {
                            string[] solTitles = solLink.InnerHtml.Split(new char[] { '\n' });
                            int solNumber      = int.Parse(solTitles[1]);
                            string solUrl      = solLink.Attributes["href"].Value.Substring(1);

                            try
                            {
                                HtmlWeb solWeb = new HtmlWeb();
                                HtmlDocument solDoc;
                                solDoc = TryLoadDoc(String.Format("{0}{1}", MslCamConstants.RawImageUrl, solUrl), solWeb);

                                HtmlNode solContent           = solDoc.DocumentNode.SelectSingleNode(@"//td[@class='pageContent']");
                                HtmlNodeCollection solTables  = solContent.SelectNodes(@".//table");
                                List <HtmlNode> solContainers = new List <HtmlNode>();

                                HtmlNode solStrong = solDoc.DocumentNode.SelectSingleNode(@"/html[1]/body[1]/div[1]/div[1]/div[1]/div[3]/table[1]/tr[2]/td[1]/div[2]/table[1]/tr[4]");
                                if (solStrong != null && solStrong.InnerText.Contains(MslCamConstants.FullDataProductName))
                                {
                                    foreach (HtmlNode tr in solStrong.ParentNode.ChildNodes)
                                    {
                                        if (!tr.InnerText.StartsWith(MslCamConstants.FullDataProductName) && tr.InnerText.Contains("Data Product"))
                                        {
                                            break;
                                        }

                                        HtmlNodeCollection imgDataDivs = tr.SelectNodes(@".//div[@class='RawImageCaption']");
                                        if (imgDataDivs != null)
                                        {
                                            solContainers.AddRange(imgDataDivs.ToList());
                                        }
                                    }
                                }

                                if (solContainers != null)
                                {
                                    Parallel.ForEach(solContainers, solImage =>
                                    {
                                        string[] imgTitles    = solImage.InnerText.Split(new char[] { '\n', '&' }, StringSplitOptions.RemoveEmptyEntries);
                                        string imgCamName     = imgTitles[0];
                                        DateTime imgTimeStamp = DateTime.Parse(imgTitles[1]);
                                        if (imgCamName.Contains(cam.CamName))
                                        {
                                            try
                                            {
                                                using (MSLScraperEntities mslContext = new MSLScraperEntities())
                                                {
                                                    if (mslContext.SolImageData.Count(x => x.Cam == imgCamName && x.Sol == solNumber && x.TimeStamp == imgTimeStamp) == 0)
                                                    {
                                                        HtmlNode imgFullLinkNode = solDoc.DocumentNode.SelectSingleNode(solImage.XPath + @"/nobr[1]/div[1]/a[1]");
                                                        string imgFullLinkUrl    = imgFullLinkNode.Attributes["href"].Value;

                                                        SolImageData newImageData = new SolImageData();
                                                        newImageData.Sol          = solNumber;
                                                        newImageData.Cam          = imgCamName;
                                                        newImageData.TimeStamp    = imgTimeStamp;
                                                        newImageData.ImageUrl     = imgFullLinkUrl;

                                                        Image bitmap = TryLoadImage(imgFullLinkUrl);

                                                        if (bitmap != null)
                                                        {
                                                            using (MemoryStream stream = new MemoryStream())
                                                            {
                                                                bitmap.Save(stream, System.Drawing.Imaging.ImageFormat.Bmp);
                                                                bitmap.Dispose();
                                                                stream.Position = 0;
                                                                byte[] data     = new byte[stream.Length];
                                                                stream.Read(data, 0, (int)stream.Length);
                                                                newImageData.ImageData = data;
                                                            }

                                                            if (mslContext.SolImageData.Count(x => x.Cam == newImageData.Cam && x.Sol == newImageData.Sol && x.TimeStamp == newImageData.TimeStamp) == 0)
                                                            {
                                                                mslContext.SolImageData.Add(newImageData);
                                                                mslContext.SaveChanges();

                                                                imagesDownloaded++;
                                                                WriteDownloadProgress(cam, solsProcessed, imagesProcessed, imagesDownloaded);
                                                            }
                                                        }
                                                    }
                                                }
                                            }
                                            catch (Exception ex)
                                            {
                                                string errorMessage = String.Format("ERROR while downloading {0} Sol {1} Timestamp {2}", cam.CamName, solNumber, imgTitles[1]);
                                                Console.WriteLine(String.Format("{0}: {1}", errorMessage, ex.Message));
                                                errorCollection.Enqueue(new Exception(errorMessage, ex));
                                            }

                                            imagesProcessed++;
                                            WriteDownloadProgress(cam, solsProcessed, imagesProcessed, imagesDownloaded);
                                        }
                                    });
                                }
                            }
                            catch (Exception ex)
                            {
                                string errorMessage = String.Format("ERROR while downloading {0} Sol {1} images", cam.CamName, solNumber);
                                Console.WriteLine(String.Format("{0}: {1}", errorMessage, ex.Message));
                                errorCollection.Enqueue(new Exception(errorMessage, ex));
                            }

                            solsProcessed++;
                            WriteDownloadProgress(cam, solsProcessed, imagesProcessed, imagesDownloaded);
                        }
                    });
                }
            }
        }