Example #1
0
 private void crawlerProcessInformation_crawlerProcessInformationNewImageEvent(CrawlerImage crawlerImage)
 {
     if (crawlerImage != null)
     {
         if (this.updateImageEvent != null)
         {
             this.updateImageEvent(crawlerImage);
         }
     }
 }
        public List<ICrawlerJobPart> run()
        {
            //
            List<ICrawlerJobPart> crawlerJobPartList = new List<ICrawlerJobPart>();

            //
            if (this.progressPercentageChangeEvent != null)
            {
                this.progressPercentageChangeEvent(0.0);
            }

            //
            if (this.crawlerJobContext != null && this.crawlerJobContext.page != null && this.crawlerJobContext.page.urlStr != null)
            {
                //
                try
                {
                    //fire request
                    this.webRequest = (HttpWebRequest)HttpWebRequest.Create(this.crawlerJobContext.page.urlStr);
                    this.webRequest.AllowAutoRedirect = true;
                    this.webRequest.AllowWriteStreamBuffering = true;
                    this.webRequest.MaximumAutomaticRedirections = 5;

                    //catch response                    
                    this.webResponse = (HttpWebResponse)webRequest.GetResponse();

                    //
                    this.analyzeAndSetRedirectInformationForPage();

                    //second request if filter settings want to avoid dummy pages
                    if (this.crawlerJobContext.crawlerImageFilter != null && this.crawlerJobContext.crawlerImageFilter.filterDummyRequest && !this.isNotRedirect)
                    {
                        //
                        Thread.Sleep(5000);

                        //
                        this.webRequest = (HttpWebRequest)HttpWebRequest.Create(this.crawlerJobContext.page.urlStr);
                        this.webRequest.AllowAutoRedirect = true;
                        this.webRequest.AllowWriteStreamBuffering = true;
                        this.webRequest.MaximumAutomaticRedirections = 5;

                        //
                        this.webResponse = (HttpWebResponse)webRequest.GetResponse();

                        //
                        this.analyzeAndSetRedirectInformationForPage();
                    }

                    //process response stream
                    if (this.isNotRedirect && this.webResponse != null)
                    {
                        //
                        String contentType = this.webResponse.ContentType;

                        //
                        Boolean isHtmlPage = contentType == null || contentType.IndexOf("html") >= 0;
                        Boolean isImage = contentType != null && contentType.IndexOf("image") >= 0;

                        //
                        if (isHtmlPage)
                        {
                            using (MemoryStream memoryStream = new MemoryStream())
                            {
                                //
                                using (Stream webResponseInputStream = this.webResponse.GetResponseStream())
                                {
                                    long estimatedContentSize = this.webResponse.ContentLength;
                                    StreamConnector.connect(webResponseInputStream, memoryStream, 8000, true, delegate(long currentlyReadBytes, out Boolean doAbort)
                                    {
                                        if (this.progressPercentageChangeEvent != null)
                                        {
                                            double progressPercentage = estimatedContentSize > 0 ? (currentlyReadBytes * 1.0) / (estimatedContentSize * 1.0) : 1.0 - 1.0 / Math.Max(currentlyReadBytes, 1.0);
                                            this.progressPercentageChangeEvent(progressPercentage);
                                        }

                                        if (this.crawlerJobPartSuspendEvent != null)
                                        {
                                            this.crawlerJobPartSuspendEvent();
                                        }

                                        doAbort = this.isAborting;
                                    });

                                    webResponseInputStream.Close();
                                }

                                //
                                memoryStream.Position = 0;
                                using (StreamReader reader = new StreamReader(memoryStream, true))
                                {
                                    this.crawlerJobContext.page.pageContent = reader.ReadToEnd();
                                }
                            }
                        }
                        else if (isImage && (this.crawlerJobContext.imageBacklog == null || !this.crawlerJobContext.imageBacklog.containsImageSrcUrlStr(this.crawlerJobContext.page.urlStr)))
                        {
                            if (this.crawlerJobContext.crawlerImageFilter == null || (
                                (this.webResponse.ContentLength < 0 || this.webResponse.ContentLength > this.crawlerJobContext.crawlerImageFilter.minimumFilesize)
                                && (!this.crawlerJobContext.crawlerImageFilter.filterImagesWithSameSize || !this.crawlerJobContext.imageBacklog.containsImageFilesize(this.webResponse.ContentLength))
                                ))
                            {
                                //
                                CrawlerImage crawlerImage = new CrawlerImage();
                                String[] contentTypeTokens = contentType.Split('/');
                                if (contentTypeTokens.Length == 2)
                                {
                                    //
                                    crawlerImage.type = contentTypeTokens[1];

                                    //
                                    using (Stream webResponseInputStream = this.webResponse.GetResponseStream())
                                    {
                                        ImageCrawler.Util.StreamConnector.ProgressReportStream progressReportStream = new ImageCrawler.Util.StreamConnector.ProgressReportStream(webResponseInputStream);
                                        long estimatedContentSize = this.webResponse.ContentLength;
                                        progressReportStream.progressUpdateEvent += new EventHandler<StreamConnector.ProgressReportStream.EventArgs>(delegate(object sender, StreamConnector.ProgressReportStream.EventArgs args)
                                        {
                                            if (this.progressPercentageChangeEvent != null)
                                            {
                                                double progressPercentage = estimatedContentSize > 0 ? (args.readBytesCounter * 1.0) / (estimatedContentSize * 1.0) : 1.0 - 1.0 / Math.Max(args.readBytesCounter, 1.0);
                                                this.progressPercentageChangeEvent(progressPercentage);
                                            }

                                            if (this.crawlerJobPartSuspendEvent != null)
                                            {
                                                this.crawlerJobPartSuspendEvent();
                                            }
                                        });

                                        Image image = Image.FromStream(progressReportStream);

                                        // 
                                        if (image.Width > this.crawlerJobContext.crawlerImageFilter.minimumWidth && image.Height > this.crawlerJobContext.crawlerImageFilter.minimumHeight && image.Width * image.Height > this.crawlerJobContext.crawlerImageFilter.minimumArea)
                                        {
                                            //
                                            crawlerImage.image = image;
                                            crawlerImage.srcUrlStr = this.crawlerJobContext.page.urlStr;
                                            crawlerImage.pageLinkUrlStr = this.crawlerJobContext.page.urlStr;

                                            //
                                            if (this.crawlerJobPartImageRetrievedEvent != null && !this.isAborting)
                                            {
                                                this.crawlerJobPartImageRetrievedEvent(crawlerImage);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }

                    //
                    if (this.crawlerJobContext.page.pageContent != null && this.crawlerJobContext.page.pageContent.Length > 0)
                    {
                        crawlerJobPartList.Add(new CrawlerJobPartAnalyzePage());
                    }
                    else if (this.crawlerJobContext.page.redirectUrlStrList != null)
                    {
                        crawlerJobPartList.Add(new CrawlerJobPartGenerateSubJobs());
                    }

                }
                catch (Exception e)
                {
                    if (this.crawlerJobContext.page != null)
                    {
                        this.crawlerJobContext.page.requestFailure = true;
                    }
                }
                finally
                {
                    if (this.webResponse != null)
                    {
                        this.webResponse.Close();
                    }
                }
            }

            //
            if (this.progressPercentageChangeEvent != null)
            {
                this.progressPercentageChangeEvent(1.0);
            }

            //
            return crawlerJobPartList;
        }