Ejemplo n.º 1
0
        public IEnumerable <ImageData> ScrapePage(WebpageConfig configuration)
        {
            WebClient w = new WebClient();

            //super high level hacks
            w.Headers.Add(HttpRequestHeader.UserAgent, configuration.UserAgent);

            foreach (var entry in configuration.RealCookies)
            {
                w.Headers.Add(HttpRequestHeader.Cookie, String.Format("{0}={1}", entry.Key, entry.Value));
            }

            Uri url = new Uri(configuration.PageUrl);
            CQ  dom = w.DownloadString(url);

            w.Dispose();

            var allImages = dom["img"].ToList();

            if (configuration.SkipImagesInsideLinks)
            {
                //Don't get images within links, ie thumbnails
                allImages = dom[":not(a) > img"].ToList();
            }

            var allLinks  = dom["a"].ToList();
            var allVideos = dom["video > source:first-child"].ToList();

            _pageTitle = dom["title"].Text().Trim();
            if (String.IsNullOrWhiteSpace(configuration.TitleOverride))
            {
                _pageTitle = configuration.TitleOverride;
            }
            _sourceUrl = url.ToString();

            var imageLinks = allImages.Select(d => d.GetAttribute("src"));
            var textLinks  = allLinks.Select(d => d.GetAttribute("href"));
            var vidLinks   = allVideos.Select(d => d.GetAttribute("src"));

            Uri baseUri = new Uri(url.Scheme + "://" + url.Host + String.Join("", url.Segments.Reverse().Skip(1).Reverse()));
            Uri newUri;

            //This can probably made a bit cleaner, but it works
            List <ImageData> genericLinks = imageLinks.Union(textLinks).Union(vidLinks)
                                            .Distinct()
                                            .Where(d => !String.IsNullOrWhiteSpace(Path.GetExtension(d)))
                                            .Where(d => PeruserConfig.Current.AllowedFileTypes.Contains(Path.GetExtension(d).Substring(1).ToLower()))
                                            .Select(d => IsOkayUri(d, baseUri, out newUri) ? newUri : null)
                                            .Where(d => d != null)
                                            .Select(d => new ImageData
            {
                FileName     = Path.GetFileName(d.ToString()),
                LastModified = DateTime.Now,
                Path         = (d.IsAbsoluteUri ? d.ToString() : new Uri(baseUri, d).ToString())
            }).ToList();

            return(genericLinks);
        }
Ejemplo n.º 2
0
 private WebpageLibrary(WebpageConfig configuration)
 {
     Images = new ObservableCollection <ImageData>(ScrapePage(configuration));
 }
Ejemplo n.º 3
0
 public ChoosePage()
 {
     Config = new WebpageConfig();
     InitializeComponent();
     AllowsTransparency = true;
 }