Ejemplo n.º 1
0
        protected override void OnAddItems(ScrapedPage page)
        {
            if (this.kryptonPanel1.Controls.Contains(this.kryptonLabelPrivacy))
            {
                this.kryptonPanel1.Controls.Remove(this.kryptonLabelPrivacy);
            }

            for (var i = 0; i < Constants.GetMaxDisplayCount(page.Videos.Count); i++)
            {
                var video  = page.Videos[i];
                var widget = new WebViewWidget(video,
                                               Properties.Resources.TestHtml.Replace("{0}", video.Url).Replace("{1}", video.ImageUrl).
                                               Replace("{2}", video.Title).Replace("{3}", video.Duration.ToString()));
                this.flowLayoutPanel.Controls.Add(widget);
                widget.ViewSelected += delegate(object sender, GalleryItemSelectedEventArgs e)
                {
                    OnItemSelected(sender, e);
                };

                if (Constants.ShowAds && i % 4 == 0)
                {
                    var ad = CreateAdWidget(Constants.HorizontalAdId);
                    this.flowLayoutPanel.Controls.Add(ad);
                }
            }
        }
Ejemplo n.º 2
0
 public void Initialize(ScrapedPage page)
 {
     LogManager.Log("Initializing view window");
     this.lastPage = page;
     this.buttonSpecHeaderGroupMore.Visible = GetNextPage() != null;
     this.verticalSingleColumnGalleryWidget1.AddItems(page);
 }
        /// <summary>
        /// Gets the ip address.
        /// </summary>
        /// <returns></returns>
        public override string GetIpAddress()
        {
            try
            {
                Scraper scraper = new Scraper();
                PrepareScraper(scraper);
                ScrapedPage checkip = scraper.Scrape(ScrapeType.GET, m_url);
                if (checkip != null && checkip.RawStream != null)
                {
                    if (string.IsNullOrEmpty(m_successRegex))
                    {
                        string result = checkip.RawStream.Trim();
                        if (StringUtilities.ExtractFirstNumber(result) > 0)
                        {
                            return(result);
                        }
                    }
                    else
                    {
                        return(RegexUtilities.Extract(checkip.RawStream, m_successRegex, m_captureIndex));
                    }
                }
            }
            catch (Exception)
            {
            }

            return(null);
        }
Ejemplo n.º 4
0
 public void OnScrapVideoCompleted(ScrapedPage page)
 {
     Program.SetIdle();
     this.InvokeEx(() =>
     {
         Initialize(page);
     });
 }
Ejemplo n.º 5
0
 public void OnScrapVideoCompleted(ScrapedPage page)
 {
     LogManager.Log("Download complete. Total videos: " + page.Videos.Count);
     this.InvokeEx(() =>
     {
         this.labelLoading.Dispose();
         this.viewerWindow.Initialize(page);
         this.browseWindow.Initialize(page);
     });
 }
Ejemplo n.º 6
0
 protected override void OnAddItems(ScrapedPage page)
 {
     for (var i = 0; i < Constants.GetMaxDisplayCount(page.Videos.Count); i++)
     {
         var video = page.Videos[i];
         AddItem(video);
         if (Constants.ShowAds && i % 4 == 0)
         {
             var ad = CreateAdWidget(Constants.VerticalAdId);
             this.tableLayoutPanel.Controls.Add(ad);
         }
     }
 }
Ejemplo n.º 7
0
        public ActionResult Index()
        {
            var    page = new ScrapedPage();
            string url  = !string.IsNullOrEmpty(Request.QueryString["url"]) ? Request.QueryString["url"].ToString() : string.Empty;

            if (!string.IsNullOrEmpty(url))
            {
                page = GetPage(url);
                return(View(page));
            }
            else
            {
                page = null;
                return(View(page));
            }
        }
        public PageScrapeHistory ScrapePages([FromBody] IEnumerable <string> request)
        {
            // If no pages were specified, scrape them all.
            PageMetadata[] pagesToScrape;
            if (request == null)
            {
                pagesToScrape = PageMetadataRepository.All().Data.ToArray();
            }
            else
            {
                pagesToScrape = request.Select(id => PageMetadataRepository.Get(id)).ToArray();
            }

            DateTime scrapeStart = DateTime.Now;

            ScrapedPage[] pages = PageScraper.Scrape(pagesToScrape, scrapeStart).ToArray();

            // Now update the per-page list of all scraped pages.
            foreach (PageMetadata pageMetadata in pagesToScrape)
            {
                ScrapedPage scrapedPage = pages.First(p => p.FacebookId == pageMetadata.FacebookId);
                pageMetadata.FanCountHistory.Insert(0, new DatedFanCount
                {
                    Date     = scrapedPage.Date,
                    FanCount = scrapedPage.FanCount,
                });
                pageMetadata.LatestScrape = scrapeStart;
                PageMetadataRepository.Save(pageMetadata, Refresh.False);

                // Only save the fan count on this date.
                pageMetadata.FanCountHistory = pageMetadata.FanCountHistory.Take(1).ToList();
            }

            // Now update the total-page list of the scrape.
            var pageScrapeHistory = new PageScrapeHistory
            {
                Id          = Guid.NewGuid().ToString(),
                ImportStart = scrapeStart,
                ImportEnd   = DateTime.Now,
                Pages       = pagesToScrape
            };

            return(PageScrapeHistoryRepository.Save(pageScrapeHistory));
        }
Ejemplo n.º 9
0
        private ScrapedPage GetPage(string url)
        {
            HtmlWeb      htmlWeb = new HtmlWeb();
            HtmlDocument doc     = htmlWeb.Load(url);

            var page = new ScrapedPage();

            page.URL         = url;
            page.Host        = htmlWeb.ResponseUri.Host;
            page.Title       = GetTitle(doc);
            page.Images      = GetImages(doc);
            page.Links       = GetLinks(doc);
            page.Keywords    = GetKeywords(doc);
            page.Description = GetDescription(doc);

            if (page.Images.Count > 0)
            {
                page.Images = ScoreImages(page);
            }

            return(page);
        }
Ejemplo n.º 10
0
        protected override void OnAddItems(ScrapedPage page)
        {
            this.flowLayoutPanel.SuspendLayout();


            for (var i = 0; i < Constants.GetMaxDisplayCount(page.Videos.Count); i++)
            {
                var video  = page.Videos[i];
                var widget = new WebViewWidget(video,
                                               Properties.Resources.TestHtml.Replace("{0}", video.Url).Replace("{1}", video.ImageUrl).
                                               Replace("{2}", video.Title).Replace("{3}", video.Duration.ToString()));
                this.flowLayoutPanel.Controls.Add(widget);
                widget.ViewSelected += delegate(object sender, GalleryItemSelectedEventArgs e)
                {
                    OnItemSelected(sender, e);
                };
            }
            this.flowLayoutPanel.ResumeLayout();
            flowLayoutPanel_Resize(this, null);
            AddLinks();

            base.OnAddItems(page);
        }
Ejemplo n.º 11
0
        public void UpdateMetadata(ScrapedPost post, string pageName)
        {
            // Update the database with the new post.
            Location location = post.Place?.Location;

            if (location != null)
            {
                post.GeoPoint = $"{location.Latitude},{location.Longitude}";
            }
            else
            {
                post.GeoPoint = null;
            }

            ScrapedPage scrapedPage = PageScraper.Closest(p => p.Name, pageName, post.CreatedTime);

            post.Page = scrapedPage;

            post.LastScraped = DateTime.Now;
            if (post.Scraped == DateTime.MinValue)
            {
                post.Scraped = post.LastScraped;
            }
        }
Ejemplo n.º 12
0
        public static async Task StorePageContent(
            [ActivityTrigger] DurableActivityContext ctx,
            [ServiceBus("%ServiceBusQueue%")] IAsyncCollector <string> collector,
            TraceWriter log)
        {
            var arguments  = ctx.GetInput <Tuple <string, string, int, int> >();
            var html       = arguments.Item1;
            var suburb     = arguments.Item2;
            var totalpages = arguments.Item3;
            var pagenumber = arguments.Item4;

            var page = new ScrapedPage
            {
                html   = html,
                suburb = suburb,
                total  = totalpages,
                number = pagenumber,
            };

            var json = JsonConvert.SerializeObject(page);
            await collector.AddAsync(json);

            log.Warning($"Page content stored {page.number} or {page.total}");
        }
Ejemplo n.º 13
0
 public void Initialize(ScrapedPage page)
 {
     this.browseGalleryWidget.ClearItems();
     this.browseGalleryWidget.AddItems(page);
 }
Ejemplo n.º 14
0
 public void AddItems(ScrapedPage page)
 {
     this.currentPage = page;
     OnAddItems(page);
 }
Ejemplo n.º 15
0
        public IEnumerable <ScrapedPage> ImportPages(IEnumerable <string> fanCountCSVs)
        {
            var      pages       = new List <ScrapedPage>();
            DateTime now         = DateTime.Now;
            int      numberSaved = 0;

            Read(fanCountCSVs, record =>
            {
                // The date is a string in a 2016-12-25 format.
                string dateString = (string)record["Dates"];
                if (dateString == "Date" || dateString == "")
                {
                    // Skip the header if it isn't parsed.
                    return;
                }

                DateTime date = DateTime.ParseExact(dateString, "yyyy-MM-dd", null);

                // Now get the list of all the pages.
                foreach (string pageName in record.Keys)
                {
                    // Skip all columns that are empty or are the "Dates" field.
                    if (pageName == "" || pageName == "Dates")
                    {
                        continue;
                    }

                    // Yuck: page names have varying degrees of leading and trailing whitespace.
                    // Yuck: page names for the same page vary between instances.
                    PageMetadata mappedPage = Mappings[pageName.Trim()];

                    // Now get the number of likes from the table.
                    // Yuck: some data is missing, or contains letters in.
                    // Yuck: some full numbers have decimal points in.
                    string numberOfLikesAsString = (string)record[pageName];
                    if (!int.TryParse(numberOfLikesAsString, NumberStyles.AllowDecimalPoint, null, out int numberOfLikes))
                    {
                        // If we can't parse the number of likes as an actual number, skip it.
                        Console.WriteLine("Can't parse number of likes");
                        continue;
                    }

                    // Add this to the fan count history.
                    ScrapedPage savedPage = PageScraper.Closest(p => p.Name, mappedPage.Name, date);
                    if (savedPage == null || savedPage.Date != date)
                    {
                        // Page doesn't have this date already. Add it.
                        savedPage = new ScrapedPage
                        {
                            Id         = Guid.NewGuid().ToString(),
                            Name       = mappedPage.Name,
                            Category   = mappedPage.Category,
                            FacebookId = mappedPage.FacebookId,
                            Date       = date,
                            FanCount   = numberOfLikes
                        };
                    }
                    else
                    {
                        // Page already has this date already. Update it.
                        savedPage.FanCount = numberOfLikes;
                    }

                    // Save the page.
                    numberSaved++;
                    Console.WriteLine(numberSaved);
                    pages.Add(PageScraper.Save(savedPage, Refresh.False));
                }
            });

            return(pages);
        }
Ejemplo n.º 16
0
        private List <Img> ScoreImages(ScrapedPage page)
        {
            var list = new List <Img>();

            foreach (var img in page.Images)
            {
                var  score     = 0;
                bool isJpg     = false;
                bool isPng     = false;
                bool isGif     = false;
                bool fetchSize = false;

                FixPath(img, page.Host);

                //is it a jpg or png, if so up it's score
                if (img.Src.ToLower().Contains(".jpg") || img.Src.ToLower().Contains(".jpeg"))
                {
                    img.IsJpg = true;
                    score     = (score + 2);
                }
                else if (img.Src.ToLower().Contains(".png"))
                {
                    isPng = true;
                    score++;
                }
                else if (img.Src.ToLower().Contains(".gif"))
                {
                    isGif = true;
                }
                else
                {
                    fetchSize = true;
                }

                //only resize the JPGS
                if (!img.HasDimensions || fetchSize)
                {
                    SizeImage(img);
                }

                if (!isGif)
                {
                    if (img.Width > 100 || img.Height > 100)
                    {
                        score++;
                    }

                    //is it big
                    if (img.CombinedSize > 200)
                    {
                        score++;

                        //does it have a good perspective
                        if (isJpg && img.Perspective < 2)
                        {
                            score++;
                        }
                    }
                }



                //does it have an alt tag
                if (!isGif && img.Alt.Length > 20)
                {
                    score++;
                }

                //knock it down if it's a logo
                if (img.Src.ToLower().Contains("logo") || img.Src.ToLower().Contains("sprite") || img.Src.ToLower().Contains("loading"))
                {
                    score = (score - 2);
                }

                img.Score = img.Score + score;

                if (score > 1)
                {
                    list.Add(img);
                }
            }

            return(list.OrderByDescending(x => x.Score).ToList());
        }
Ejemplo n.º 17
0
 protected virtual void OnAddItems(ScrapedPage page)
 {
 }