Пример #1
0
        protected async Task <string> RequestApiDataAsync(string url, string bearerToken, Dictionary <string, string> headers = null,
                                                          IEnumerable <string> cookieHosts = null)
        {
            var requestRegistration = new CancellationTokenRegistration();

            try
            {
                HttpWebRequest request = WebRequestFactory.CreateGetRequest(url, string.Empty, headers);
                cookieHosts = cookieHosts ?? new List <string>();
                foreach (string cookieHost in cookieHosts)
                {
                    CookieService.GetUriCookie(request.CookieContainer, new Uri(cookieHost));
                }

                request.PreAuthenticate = true;
                request.Headers.Add("Authorization", "Bearer " + bearerToken);
                request.Accept = "application/json";

                requestRegistration = Ct.Register(() => request.Abort());
                return(await WebRequestFactory.ReadRequestToEndAsync(request));
            }
            finally
            {
                requestRegistration.Dispose();
            }
        }
Пример #2
0
        protected virtual async Task <string> RequestPostAsync(int pageNumber)
        {
            var requestRegistration = new CancellationTokenRegistration();

            try
            {
                string url     = "https://www.tumblr.com/search/" + Blog.Name + "/post_page/" + pageNumber;
                string referer = @"https://www.tumblr.com/search/" + Blog.Name;
                var    headers = new Dictionary <string, string> {
                    { "X-tumblr-form-key", tumblrKey }, { "DNT", "1" }
                };
                HttpWebRequest request = WebRequestFactory.CreatePostXhrRequest(url, referer, headers);
                CookieService.GetTumblrConsentCookies(request.CookieContainer);

                //Example request body, searching for cars:
                //q=cars&sort=top&post_view=masonry&blogs_before=8&num_blogs_shown=8&num_posts_shown=20&before=24&blog_page=2&safe_mode=true&post_page=2&filter_nsfw=true&filter_post_type=&next_ad_offset=0&ad_placement_id=0&more_posts=true

                string requestBody = "q=" + Blog.Name + "&sort=top&post_view=masonry&num_posts_shown=" +
                                     ((pageNumber - 1) * Blog.PageSize) + "&before=" + ((pageNumber - 1) * Blog.PageSize) +
                                     "&safe_mode=false&post_page=" + pageNumber +
                                     "&filter_nsfw=false&filter_post_type=&next_ad_offset=0&ad_placement_id=0&more_posts=true";
                await WebRequestFactory.PerformPostXHRRequestAsync(request, requestBody);

                requestRegistration = Ct.Register(() => request.Abort());
                return(await WebRequestFactory.ReadRequestToEndAsync(request));
            }
            finally
            {
                requestRegistration.Dispose();
            }
        }
Пример #3
0
        private async Task <string> GetGuestToken()
        {
            if (guestToken == null)
            {
                var requestRegistration = new CancellationTokenRegistration();
                try
                {
                    string url = await GetApiUrl(Blog.Url, 0, "", 0);

                    if (ShellService.Settings.LimitConnectionsApi)
                    {
                        CrawlerService.TimeconstraintApi.Acquire();
                    }
                    var headers = new Dictionary <string, string>();
                    headers.Add("Origin", "https://twitter.com");
                    headers.Add("Authorization", "Bearer " + BearerToken);
                    HttpWebRequest request = WebRequestFactory.CreatePostRequest(url, "https://twitter.com", headers);
                    CookieService.GetUriCookie(request.CookieContainer, new Uri("https://twitter.com"));
                    requestRegistration = Ct.Register(() => request.Abort());
                    var content = await WebRequestFactory.ReadRequestToEndAsync(request);

                    guestToken = ((JValue)((JObject)JsonConvert.DeserializeObject(content))["guest_token"]).Value <string>();
                }
                catch (Exception e)
                {
                    Logger.Error("GetGuestToken: {0}", e);
                }
                finally
                {
                    requestRegistration.Dispose();
                }
            }
            return(guestToken);
        }
Пример #4
0
        protected string RetrieveOriginalImageUrl(string url, int width, int height)
        {
            if (width > height)
            {
                (width, height) = (height, width);
            }
            if (ShellService.Settings.ImageSize != "best" ||
                !url.Contains("/s1280x1920/") ||
                (width <= 1280 && height <= 1920))
            {
                return(url);
            }

            url = url.Replace("/s1280x1920/", (width <= 2048 && height <= 3072) ? "/s2048x3072/" : "/s99999x99999/");
            string pageContent = "";

            try
            {
                HttpWebRequest request = WebRequestFactory.CreateGetRequest(url, "",
                                                                            new Dictionary <string, string>()
                {
                    { "Accept-Language", "en-US" }, { "Accept-Encoding", "gzip, deflate" }
                }, false);
                request.Accept    = "text/html, application/xhtml+xml, */*";
                request.UserAgent = ShellService.Settings.UserAgent;
                request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip;
                pageContent = WebRequestFactory.ReadRequestToEndAsync(request).GetAwaiter().GetResult();
            }
            catch (Exception e)
            {
                Logger.Error("TumblrBlogCrawler:RetrieveRawImageUrl:Exception {0}", e);
                return(url);
            }
            pageContent = extractJsonFromPage.Match(pageContent).Groups[1].Value;
            ImageResponse imgRsp = ConvertJsonToClass <ImageResponse>(pageContent);
            Image         img    = imgRsp.Images.FirstOrDefault(x => x.HasOriginalDimensions = true);

            return(string.IsNullOrEmpty(img.MediaKey) ? url : img.Url);
        }
Пример #5
0
        protected virtual async Task <string> RequestDataAsync(string limit, string offset)
        {
            var requestRegistration = new CancellationTokenRegistration();

            try
            {
                string url = @"https://www.tumblr.com/svc/indash_blog?tumblelog_name_or_id=" + Blog.Name +
                             @"&post_id=&limit=" + limit + "&offset=" + offset + "&should_bypass_safemode=true";
                string referer = @"https://www.tumblr.com/dashboard/blog/" + Blog.Name;
                var    headers = new Dictionary <string, string> {
                    { "X-tumblr-form-key", tumblrKey }
                };
                HttpWebRequest request = WebRequestFactory.CreateGetXhrRequest(url, referer, headers);
                CookieService.GetUriCookie(request.CookieContainer, new Uri("https://www.tumblr.com/"));
                CookieService.GetUriCookie(request.CookieContainer, new Uri("https://" + Blog.Name.Replace("+", "-") + ".tumblr.com"));
                requestRegistration = Ct.Register(() => request.Abort());
                return(await WebRequestFactory.ReadRequestToEndAsync(request));
            }
            finally
            {
                requestRegistration.Dispose();
            }
        }
Пример #6
0
        protected string RetrieveOriginalImageUrl(string url, int width, int height)
        {
            if (width > height)
            {
                (width, height) = (height, width);
            }
            if (ShellService.Settings.ImageSize != "best" ||
                !url.Contains("/s1280x1920/") ||
                (width <= 1280 && height <= 1920))
            {
                return(url);
            }

            url = url.Replace("/s1280x1920/", (width <= 2048 && height <= 3072) ? "/s2048x3072/" : "/s99999x99999/");
            string    pageContent = "";
            int       errCnt      = 0;
            Exception lastError   = null;

            do
            {
                try
                {
                    HttpWebRequest request = WebRequestFactory.CreateGetRequest(url, "",
                                                                                new Dictionary <string, string>()
                    {
                        { "Accept-Language", "en-US" }, { "Accept-Encoding", "gzip, deflate" }
                    }, false);
                    request.Accept    = "text/html, application/xhtml+xml, */*";
                    request.UserAgent = ShellService.Settings.UserAgent;
                    request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip;
                    pageContent = WebRequestFactory.ReadRequestToEndAsync(request).GetAwaiter().GetResult();
                    errCnt      = 9;
                }
                catch (WebException we)
                {
                    if (we.Response != null && ((HttpWebResponse)we.Response).StatusCode == HttpStatusCode.NotFound)
                    {
                        return(url);
                    }
                }
                catch (Exception e)
                {
                    errCnt++;
                    Logger.Error("AbstractTumblrCrawler:RetrieveOriginalImageUrl: {0}", e);
                    lastError = e;
                    if (errCnt < 3)
                    {
                        Thread.Sleep(errCnt * 10000);
                    }
                }
            } while (errCnt < 3);
            if (errCnt == 3)
            {
                ShellService.ShowError(lastError, Resources.PostNotParsable, Blog.Name);
                throw new NullReferenceException("RetrieveOriginalImageUrl download", lastError);
            }
            try
            {
                var extracted = extractJsonFromPage.Match(pageContent).Groups[1].Value;
                extracted = new Regex("/.*/").Replace(extracted, "\"\"");
                ImageResponse imgRsp   = DeserializeImageResponse(extracted);
                int           maxWidth = imgRsp.Images.Max(x => x.Width);
                Image         img      = imgRsp.Images.FirstOrDefault(x => x.Width == maxWidth);
                return(string.IsNullOrEmpty(img?.MediaKey) ? url : img.Url);
            }
            catch (Exception ex)
            {
                Logger.Error("AbstractTumblrCrawler:RetrieveOriginalImageUrl: {0}", ex);
                ShellService.ShowError(ex, Resources.PostNotParsable, Blog.Name);
                throw new NullReferenceException("RetrieveOriginalImageUrl parsing", ex);
            }
        }