protected async Task <string> RequestApiDataAsync(string url, string bearerToken, Dictionary <string, string> headers = null, IEnumerable <string> cookieHosts = null) { var requestRegistration = new CancellationTokenRegistration(); try { HttpWebRequest request = WebRequestFactory.CreateGetRequest(url, string.Empty, headers); cookieHosts = cookieHosts ?? new List <string>(); foreach (string cookieHost in cookieHosts) { CookieService.GetUriCookie(request.CookieContainer, new Uri(cookieHost)); } request.PreAuthenticate = true; request.Headers.Add("Authorization", "Bearer " + bearerToken); request.Accept = "application/json"; requestRegistration = Ct.Register(() => request.Abort()); return(await WebRequestFactory.ReadRequestToEndAsync(request)); } finally { requestRegistration.Dispose(); } }
protected virtual async Task <string> RequestPostAsync(int pageNumber) { var requestRegistration = new CancellationTokenRegistration(); try { string url = "https://www.tumblr.com/search/" + Blog.Name + "/post_page/" + pageNumber; string referer = @"https://www.tumblr.com/search/" + Blog.Name; var headers = new Dictionary <string, string> { { "X-tumblr-form-key", tumblrKey }, { "DNT", "1" } }; HttpWebRequest request = WebRequestFactory.CreatePostXhrRequest(url, referer, headers); CookieService.GetTumblrConsentCookies(request.CookieContainer); //Example request body, searching for cars: //q=cars&sort=top&post_view=masonry&blogs_before=8&num_blogs_shown=8&num_posts_shown=20&before=24&blog_page=2&safe_mode=true&post_page=2&filter_nsfw=true&filter_post_type=&next_ad_offset=0&ad_placement_id=0&more_posts=true string requestBody = "q=" + Blog.Name + "&sort=top&post_view=masonry&num_posts_shown=" + ((pageNumber - 1) * Blog.PageSize) + "&before=" + ((pageNumber - 1) * Blog.PageSize) + "&safe_mode=false&post_page=" + pageNumber + "&filter_nsfw=false&filter_post_type=&next_ad_offset=0&ad_placement_id=0&more_posts=true"; await WebRequestFactory.PerformPostXHRRequestAsync(request, requestBody); requestRegistration = Ct.Register(() => request.Abort()); return(await WebRequestFactory.ReadRequestToEndAsync(request)); } finally { requestRegistration.Dispose(); } }
private async Task <string> GetGuestToken() { if (guestToken == null) { var requestRegistration = new CancellationTokenRegistration(); try { string url = await GetApiUrl(Blog.Url, 0, "", 0); if (ShellService.Settings.LimitConnectionsApi) { CrawlerService.TimeconstraintApi.Acquire(); } var headers = new Dictionary <string, string>(); headers.Add("Origin", "https://twitter.com"); headers.Add("Authorization", "Bearer " + BearerToken); HttpWebRequest request = WebRequestFactory.CreatePostRequest(url, "https://twitter.com", headers); CookieService.GetUriCookie(request.CookieContainer, new Uri("https://twitter.com")); requestRegistration = Ct.Register(() => request.Abort()); var content = await WebRequestFactory.ReadRequestToEndAsync(request); guestToken = ((JValue)((JObject)JsonConvert.DeserializeObject(content))["guest_token"]).Value <string>(); } catch (Exception e) { Logger.Error("GetGuestToken: {0}", e); } finally { requestRegistration.Dispose(); } } return(guestToken); }
protected string RetrieveOriginalImageUrl(string url, int width, int height) { if (width > height) { (width, height) = (height, width); } if (ShellService.Settings.ImageSize != "best" || !url.Contains("/s1280x1920/") || (width <= 1280 && height <= 1920)) { return(url); } url = url.Replace("/s1280x1920/", (width <= 2048 && height <= 3072) ? "/s2048x3072/" : "/s99999x99999/"); string pageContent = ""; try { HttpWebRequest request = WebRequestFactory.CreateGetRequest(url, "", new Dictionary <string, string>() { { "Accept-Language", "en-US" }, { "Accept-Encoding", "gzip, deflate" } }, false); request.Accept = "text/html, application/xhtml+xml, */*"; request.UserAgent = ShellService.Settings.UserAgent; request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip; pageContent = WebRequestFactory.ReadRequestToEndAsync(request).GetAwaiter().GetResult(); } catch (Exception e) { Logger.Error("TumblrBlogCrawler:RetrieveRawImageUrl:Exception {0}", e); return(url); } pageContent = extractJsonFromPage.Match(pageContent).Groups[1].Value; ImageResponse imgRsp = ConvertJsonToClass <ImageResponse>(pageContent); Image img = imgRsp.Images.FirstOrDefault(x => x.HasOriginalDimensions = true); return(string.IsNullOrEmpty(img.MediaKey) ? url : img.Url); }
protected virtual async Task <string> RequestDataAsync(string limit, string offset) { var requestRegistration = new CancellationTokenRegistration(); try { string url = @"https://www.tumblr.com/svc/indash_blog?tumblelog_name_or_id=" + Blog.Name + @"&post_id=&limit=" + limit + "&offset=" + offset + "&should_bypass_safemode=true"; string referer = @"https://www.tumblr.com/dashboard/blog/" + Blog.Name; var headers = new Dictionary <string, string> { { "X-tumblr-form-key", tumblrKey } }; HttpWebRequest request = WebRequestFactory.CreateGetXhrRequest(url, referer, headers); CookieService.GetUriCookie(request.CookieContainer, new Uri("https://www.tumblr.com/")); CookieService.GetUriCookie(request.CookieContainer, new Uri("https://" + Blog.Name.Replace("+", "-") + ".tumblr.com")); requestRegistration = Ct.Register(() => request.Abort()); return(await WebRequestFactory.ReadRequestToEndAsync(request)); } finally { requestRegistration.Dispose(); } }
protected string RetrieveOriginalImageUrl(string url, int width, int height) { if (width > height) { (width, height) = (height, width); } if (ShellService.Settings.ImageSize != "best" || !url.Contains("/s1280x1920/") || (width <= 1280 && height <= 1920)) { return(url); } url = url.Replace("/s1280x1920/", (width <= 2048 && height <= 3072) ? "/s2048x3072/" : "/s99999x99999/"); string pageContent = ""; int errCnt = 0; Exception lastError = null; do { try { HttpWebRequest request = WebRequestFactory.CreateGetRequest(url, "", new Dictionary <string, string>() { { "Accept-Language", "en-US" }, { "Accept-Encoding", "gzip, deflate" } }, false); request.Accept = "text/html, application/xhtml+xml, */*"; request.UserAgent = ShellService.Settings.UserAgent; request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip; pageContent = WebRequestFactory.ReadRequestToEndAsync(request).GetAwaiter().GetResult(); errCnt = 9; } catch (WebException we) { if (we.Response != null && ((HttpWebResponse)we.Response).StatusCode == HttpStatusCode.NotFound) { return(url); } } catch (Exception e) { errCnt++; Logger.Error("AbstractTumblrCrawler:RetrieveOriginalImageUrl: {0}", e); lastError = e; if (errCnt < 3) { Thread.Sleep(errCnt * 10000); } } } while (errCnt < 3); if (errCnt == 3) { ShellService.ShowError(lastError, Resources.PostNotParsable, Blog.Name); throw new NullReferenceException("RetrieveOriginalImageUrl download", lastError); } try { var extracted = extractJsonFromPage.Match(pageContent).Groups[1].Value; extracted = new Regex("/.*/").Replace(extracted, "\"\""); ImageResponse imgRsp = DeserializeImageResponse(extracted); int maxWidth = imgRsp.Images.Max(x => x.Width); Image img = imgRsp.Images.FirstOrDefault(x => x.Width == maxWidth); return(string.IsNullOrEmpty(img?.MediaKey) ? url : img.Url); } catch (Exception ex) { Logger.Error("AbstractTumblrCrawler:RetrieveOriginalImageUrl: {0}", ex); ShellService.ShowError(ex, Resources.PostNotParsable, Blog.Name); throw new NullReferenceException("RetrieveOriginalImageUrl parsing", ex); } }