protected override void OnAddItems(ScrapedPage page) { if (this.kryptonPanel1.Controls.Contains(this.kryptonLabelPrivacy)) { this.kryptonPanel1.Controls.Remove(this.kryptonLabelPrivacy); } for (var i = 0; i < Constants.GetMaxDisplayCount(page.Videos.Count); i++) { var video = page.Videos[i]; var widget = new WebViewWidget(video, Properties.Resources.TestHtml.Replace("{0}", video.Url).Replace("{1}", video.ImageUrl). Replace("{2}", video.Title).Replace("{3}", video.Duration.ToString())); this.flowLayoutPanel.Controls.Add(widget); widget.ViewSelected += delegate(object sender, GalleryItemSelectedEventArgs e) { OnItemSelected(sender, e); }; if (Constants.ShowAds && i % 4 == 0) { var ad = CreateAdWidget(Constants.HorizontalAdId); this.flowLayoutPanel.Controls.Add(ad); } } }
public void Initialize(ScrapedPage page) { LogManager.Log("Initializing view window"); this.lastPage = page; this.buttonSpecHeaderGroupMore.Visible = GetNextPage() != null; this.verticalSingleColumnGalleryWidget1.AddItems(page); }
/// <summary> /// Gets the ip address. /// </summary> /// <returns></returns> public override string GetIpAddress() { try { Scraper scraper = new Scraper(); PrepareScraper(scraper); ScrapedPage checkip = scraper.Scrape(ScrapeType.GET, m_url); if (checkip != null && checkip.RawStream != null) { if (string.IsNullOrEmpty(m_successRegex)) { string result = checkip.RawStream.Trim(); if (StringUtilities.ExtractFirstNumber(result) > 0) { return(result); } } else { return(RegexUtilities.Extract(checkip.RawStream, m_successRegex, m_captureIndex)); } } } catch (Exception) { } return(null); }
public void OnScrapVideoCompleted(ScrapedPage page) { Program.SetIdle(); this.InvokeEx(() => { Initialize(page); }); }
public void OnScrapVideoCompleted(ScrapedPage page) { LogManager.Log("Download complete. Total videos: " + page.Videos.Count); this.InvokeEx(() => { this.labelLoading.Dispose(); this.viewerWindow.Initialize(page); this.browseWindow.Initialize(page); }); }
protected override void OnAddItems(ScrapedPage page) { for (var i = 0; i < Constants.GetMaxDisplayCount(page.Videos.Count); i++) { var video = page.Videos[i]; AddItem(video); if (Constants.ShowAds && i % 4 == 0) { var ad = CreateAdWidget(Constants.VerticalAdId); this.tableLayoutPanel.Controls.Add(ad); } } }
public ActionResult Index() { var page = new ScrapedPage(); string url = !string.IsNullOrEmpty(Request.QueryString["url"]) ? Request.QueryString["url"].ToString() : string.Empty; if (!string.IsNullOrEmpty(url)) { page = GetPage(url); return(View(page)); } else { page = null; return(View(page)); } }
public PageScrapeHistory ScrapePages([FromBody] IEnumerable <string> request) { // If no pages were specified, scrape them all. PageMetadata[] pagesToScrape; if (request == null) { pagesToScrape = PageMetadataRepository.All().Data.ToArray(); } else { pagesToScrape = request.Select(id => PageMetadataRepository.Get(id)).ToArray(); } DateTime scrapeStart = DateTime.Now; ScrapedPage[] pages = PageScraper.Scrape(pagesToScrape, scrapeStart).ToArray(); // Now update the per-page list of all scraped pages. foreach (PageMetadata pageMetadata in pagesToScrape) { ScrapedPage scrapedPage = pages.First(p => p.FacebookId == pageMetadata.FacebookId); pageMetadata.FanCountHistory.Insert(0, new DatedFanCount { Date = scrapedPage.Date, FanCount = scrapedPage.FanCount, }); pageMetadata.LatestScrape = scrapeStart; PageMetadataRepository.Save(pageMetadata, Refresh.False); // Only save the fan count on this date. pageMetadata.FanCountHistory = pageMetadata.FanCountHistory.Take(1).ToList(); } // Now update the total-page list of the scrape. var pageScrapeHistory = new PageScrapeHistory { Id = Guid.NewGuid().ToString(), ImportStart = scrapeStart, ImportEnd = DateTime.Now, Pages = pagesToScrape }; return(PageScrapeHistoryRepository.Save(pageScrapeHistory)); }
private ScrapedPage GetPage(string url) { HtmlWeb htmlWeb = new HtmlWeb(); HtmlDocument doc = htmlWeb.Load(url); var page = new ScrapedPage(); page.URL = url; page.Host = htmlWeb.ResponseUri.Host; page.Title = GetTitle(doc); page.Images = GetImages(doc); page.Links = GetLinks(doc); page.Keywords = GetKeywords(doc); page.Description = GetDescription(doc); if (page.Images.Count > 0) { page.Images = ScoreImages(page); } return(page); }
protected override void OnAddItems(ScrapedPage page) { this.flowLayoutPanel.SuspendLayout(); for (var i = 0; i < Constants.GetMaxDisplayCount(page.Videos.Count); i++) { var video = page.Videos[i]; var widget = new WebViewWidget(video, Properties.Resources.TestHtml.Replace("{0}", video.Url).Replace("{1}", video.ImageUrl). Replace("{2}", video.Title).Replace("{3}", video.Duration.ToString())); this.flowLayoutPanel.Controls.Add(widget); widget.ViewSelected += delegate(object sender, GalleryItemSelectedEventArgs e) { OnItemSelected(sender, e); }; } this.flowLayoutPanel.ResumeLayout(); flowLayoutPanel_Resize(this, null); AddLinks(); base.OnAddItems(page); }
public void UpdateMetadata(ScrapedPost post, string pageName) { // Update the database with the new post. Location location = post.Place?.Location; if (location != null) { post.GeoPoint = $"{location.Latitude},{location.Longitude}"; } else { post.GeoPoint = null; } ScrapedPage scrapedPage = PageScraper.Closest(p => p.Name, pageName, post.CreatedTime); post.Page = scrapedPage; post.LastScraped = DateTime.Now; if (post.Scraped == DateTime.MinValue) { post.Scraped = post.LastScraped; } }
public static async Task StorePageContent( [ActivityTrigger] DurableActivityContext ctx, [ServiceBus("%ServiceBusQueue%")] IAsyncCollector <string> collector, TraceWriter log) { var arguments = ctx.GetInput <Tuple <string, string, int, int> >(); var html = arguments.Item1; var suburb = arguments.Item2; var totalpages = arguments.Item3; var pagenumber = arguments.Item4; var page = new ScrapedPage { html = html, suburb = suburb, total = totalpages, number = pagenumber, }; var json = JsonConvert.SerializeObject(page); await collector.AddAsync(json); log.Warning($"Page content stored {page.number} or {page.total}"); }
public void Initialize(ScrapedPage page) { this.browseGalleryWidget.ClearItems(); this.browseGalleryWidget.AddItems(page); }
public void AddItems(ScrapedPage page) { this.currentPage = page; OnAddItems(page); }
public IEnumerable <ScrapedPage> ImportPages(IEnumerable <string> fanCountCSVs) { var pages = new List <ScrapedPage>(); DateTime now = DateTime.Now; int numberSaved = 0; Read(fanCountCSVs, record => { // The date is a string in a 2016-12-25 format. string dateString = (string)record["Dates"]; if (dateString == "Date" || dateString == "") { // Skip the header if it isn't parsed. return; } DateTime date = DateTime.ParseExact(dateString, "yyyy-MM-dd", null); // Now get the list of all the pages. foreach (string pageName in record.Keys) { // Skip all columns that are empty or are the "Dates" field. if (pageName == "" || pageName == "Dates") { continue; } // Yuck: page names have varying degrees of leading and trailing whitespace. // Yuck: page names for the same page vary between instances. PageMetadata mappedPage = Mappings[pageName.Trim()]; // Now get the number of likes from the table. // Yuck: some data is missing, or contains letters in. // Yuck: some full numbers have decimal points in. string numberOfLikesAsString = (string)record[pageName]; if (!int.TryParse(numberOfLikesAsString, NumberStyles.AllowDecimalPoint, null, out int numberOfLikes)) { // If we can't parse the number of likes as an actual number, skip it. Console.WriteLine("Can't parse number of likes"); continue; } // Add this to the fan count history. ScrapedPage savedPage = PageScraper.Closest(p => p.Name, mappedPage.Name, date); if (savedPage == null || savedPage.Date != date) { // Page doesn't have this date already. Add it. savedPage = new ScrapedPage { Id = Guid.NewGuid().ToString(), Name = mappedPage.Name, Category = mappedPage.Category, FacebookId = mappedPage.FacebookId, Date = date, FanCount = numberOfLikes }; } else { // Page already has this date already. Update it. savedPage.FanCount = numberOfLikes; } // Save the page. numberSaved++; Console.WriteLine(numberSaved); pages.Add(PageScraper.Save(savedPage, Refresh.False)); } }); return(pages); }
private List <Img> ScoreImages(ScrapedPage page) { var list = new List <Img>(); foreach (var img in page.Images) { var score = 0; bool isJpg = false; bool isPng = false; bool isGif = false; bool fetchSize = false; FixPath(img, page.Host); //is it a jpg or png, if so up it's score if (img.Src.ToLower().Contains(".jpg") || img.Src.ToLower().Contains(".jpeg")) { img.IsJpg = true; score = (score + 2); } else if (img.Src.ToLower().Contains(".png")) { isPng = true; score++; } else if (img.Src.ToLower().Contains(".gif")) { isGif = true; } else { fetchSize = true; } //only resize the JPGS if (!img.HasDimensions || fetchSize) { SizeImage(img); } if (!isGif) { if (img.Width > 100 || img.Height > 100) { score++; } //is it big if (img.CombinedSize > 200) { score++; //does it have a good perspective if (isJpg && img.Perspective < 2) { score++; } } } //does it have an alt tag if (!isGif && img.Alt.Length > 20) { score++; } //knock it down if it's a logo if (img.Src.ToLower().Contains("logo") || img.Src.ToLower().Contains("sprite") || img.Src.ToLower().Contains("loading")) { score = (score - 2); } img.Score = img.Score + score; if (score > 1) { list.Add(img); } } return(list.OrderByDescending(x => x.Score).ToList()); }
protected virtual void OnAddItems(ScrapedPage page) { }