protected virtual async Task <string> RequestPostAsync(int pageNumber) { var requestRegistration = new CancellationTokenRegistration(); try { string url = "https://www.tumblr.com/search/" + Blog.Name + "/post_page/" + pageNumber; string referer = @"https://www.tumblr.com/search/" + Blog.Name; var headers = new Dictionary <string, string> { { "X-tumblr-form-key", tumblrKey }, { "DNT", "1" } }; HttpWebRequest request = WebRequestFactory.CreatePostXhrReqeust(url, referer, headers); CookieService.GetUriCookie(request.CookieContainer, new Uri("https://www.tumblr.com/")); //Example request body, searching for cars: //q=cars&sort=top&post_view=masonry&blogs_before=8&num_blogs_shown=8&num_posts_shown=20&before=24&blog_page=2&safe_mode=true&post_page=2&filter_nsfw=true&filter_post_type=&next_ad_offset=0&ad_placement_id=0&more_posts=true string requestBody = "q=" + Blog.Name + "&sort=top&post_view=masonry&num_posts_shown=" + ((pageNumber - 1) * Blog.PageSize) + "&before=" + ((pageNumber - 1) * Blog.PageSize) + "&safe_mode=false&post_page=" + pageNumber + "&filter_nsfw=false&filter_post_type=&next_ad_offset=0&ad_placement_id=0&more_posts=true"; await WebRequestFactory.PerformPostXHRReqeustAsync(request, requestBody); requestRegistration = Ct.Register(() => request.Abort()); return(await WebRequestFactory.ReadReqestToEndAsync(request)); } finally { requestRegistration.Dispose(); } }
protected async Task <string> RequestApiDataAsync(string url, string bearerToken, Dictionary <string, string> headers = null, IEnumerable <string> cookieHosts = null) { var requestRegistration = new CancellationTokenRegistration(); try { HttpWebRequest request = WebRequestFactory.CreateGetRequest(url, string.Empty, headers); cookieHosts = cookieHosts ?? new List <string>(); foreach (string cookieHost in cookieHosts) { CookieService.GetUriCookie(request.CookieContainer, new Uri(cookieHost)); } request.PreAuthenticate = true; request.Headers.Add("Authorization", "Bearer " + bearerToken); request.Accept = "application/json"; requestRegistration = Ct.Register(() => request.Abort()); return(await WebRequestFactory.ReadRequestToEndAsync(request)); } finally { requestRegistration.Dispose(); } }
private async Task <string> GetGuestToken() { if (guestToken == null) { var requestRegistration = new CancellationTokenRegistration(); try { string url = await GetApiUrl(Blog.Url, 0, "", 0); if (ShellService.Settings.LimitConnectionsApi) { CrawlerService.TimeconstraintApi.Acquire(); } var headers = new Dictionary <string, string>(); headers.Add("Origin", "https://twitter.com"); headers.Add("Authorization", "Bearer " + BearerToken); HttpWebRequest request = WebRequestFactory.CreatePostRequest(url, "https://twitter.com", headers); CookieService.GetUriCookie(request.CookieContainer, new Uri("https://twitter.com")); requestRegistration = Ct.Register(() => request.Abort()); var content = await WebRequestFactory.ReadRequestToEndAsync(request); guestToken = ((JValue)((JObject)JsonConvert.DeserializeObject(content))["guest_token"]).Value <string>(); } catch (Exception e) { Logger.Error("GetGuestToken: {0}", e); } finally { requestRegistration.Dispose(); } } return(guestToken); }
protected async Task <string> RequestDataAsync(string url, Dictionary <string, string> headers = null, IEnumerable <string> cookieHosts = null) { var requestRegistration = new CancellationTokenRegistration(); try { int redirects = 0; ResponseDetails responseDetails; do { HttpWebRequest request = WebRequestFactory.CreateGetRequest(url, string.Empty, headers, false); cookieHosts = cookieHosts ?? new List <string>(); foreach (string cookieHost in cookieHosts) { CookieService.GetUriCookie(request.CookieContainer, new Uri(cookieHost)); } requestRegistration = Ct.Register(() => request.Abort()); responseDetails = await WebRequestFactory.ReadRequestToEnd2Async(request); if (responseDetails.HttpStatusCode == HttpStatusCode.Found) { url = request.RequestUri.GetLeftPart(UriPartial.Authority) + responseDetails.RedirectUrl; } } while (responseDetails.HttpStatusCode == HttpStatusCode.Found && redirects++ < 5); if (responseDetails.HttpStatusCode == HttpStatusCode.Found) { throw new WebException("Too many automatic redirections were attempted.", WebExceptionStatus.ProtocolError); } return(responseDetails.Response); } finally { requestRegistration.Dispose(); } }
protected async Task <string> RequestDataAsync(string url, Dictionary <string, string> headers = null, IEnumerable <string> cookieHosts = null) { var requestRegistration = new CancellationTokenRegistration(); try { HttpWebRequest request = WebRequestFactory.CreateGetReqeust(url, string.Empty, headers); cookieHosts = cookieHosts ?? new List <string>(); foreach (string cookieHost in cookieHosts) { CookieService.GetUriCookie(request.CookieContainer, new Uri(cookieHost)); } requestRegistration = Ct.Register(() => request.Abort()); return(await WebRequestFactory.ReadReqestToEndAsync(request)); } finally { requestRegistration.Dispose(); } }
protected virtual async Task <string> RequestDataAsync(string limit, string offset) { var requestRegistration = new CancellationTokenRegistration(); try { string url = @"https://www.tumblr.com/svc/indash_blog?tumblelog_name_or_id=" + Blog.Name + @"&post_id=&limit=" + limit + "&offset=" + offset + "&should_bypass_safemode=true"; string referer = @"https://www.tumblr.com/dashboard/blog/" + Blog.Name; var headers = new Dictionary <string, string> { { "X-tumblr-form-key", tumblrKey } }; HttpWebRequest request = WebRequestFactory.CreateGetXhrRequest(url, referer, headers); CookieService.GetUriCookie(request.CookieContainer, new Uri("https://www.tumblr.com/")); CookieService.GetUriCookie(request.CookieContainer, new Uri("https://" + Blog.Name.Replace("+", "-") + ".tumblr.com")); requestRegistration = Ct.Register(() => request.Abort()); return(await WebRequestFactory.ReadRequestToEndAsync(request)); } finally { requestRegistration.Dispose(); } }
protected async Task <string> RequestDataAsync(string url, Dictionary <string, string> headers = null, IEnumerable <string> cookieHosts = null) { var requestRegistration = new CancellationTokenRegistration(); try { int redirects = 0; ResponseDetails responseDetails; do { HttpWebRequest request = WebRequestFactory.CreateGetRequest(url, string.Empty, headers, false); cookieHosts = cookieHosts ?? new List <string>(); foreach (string cookieHost in cookieHosts) { CookieService.GetUriCookie(request.CookieContainer, new Uri(cookieHost)); } requestRegistration = Ct.Register(() => request.Abort()); responseDetails = await WebRequestFactory.ReadRequestToEnd2Async(request); url = responseDetails.RedirectUrl ?? url; if (responseDetails.HttpStatusCode == HttpStatusCode.Found) { if (url.Contains("privacy/consent")) { var ex = new Exception("Acceptance of privacy consent needed!"); ShellService.ShowError(new TumblrPrivacyConsentException(ex), Resources.ConfirmationTumblrPrivacyConsentNeeded); throw ex; } if (!url.StartsWith("http", StringComparison.InvariantCultureIgnoreCase)) { url = request.RequestUri.GetLeftPart(UriPartial.Authority) + url; } } if (responseDetails.HttpStatusCode == HttpStatusCode.Moved) { Uri uri = new Uri(url); if (!uri.Authority.Contains(".tumblr.")) { Blog.Url = uri.GetLeftPart(UriPartial.Authority); } } } while ((responseDetails.HttpStatusCode == HttpStatusCode.Found || responseDetails.HttpStatusCode == HttpStatusCode.Moved) && redirects++ < 5); if (responseDetails.HttpStatusCode == HttpStatusCode.Found) { throw new WebException("Too many automatic redirections were attempted.", WebExceptionStatus.ProtocolError); } return(responseDetails.Response); } catch (Exception e) { Logger.Error("AbstractCrawler.RequestDataAsync: {0}", e); throw; } finally { requestRegistration.Dispose(); } }
protected string RetrieveOriginalImageUrl(string url, int width, int height, bool isInline) { if (width > height) { (width, height) = (height, width); } if (ShellService.Settings.ImageSize != "best" || !isInline && !url.Contains("/s1280x1920/") || (width <= 1280 && height <= 1920) || isInline && !new Regex(@"\/s[\d]{2,4}x[\d]{2,4}\/").IsMatch(url)) { return(url); } if (isInline) { var re = new Regex(@"\/s[\d]{2,4}x[\d]{2,4}\/"); url = re.Replace(url, "/s2048x3072/"); } else { url = url.Replace("/s1280x1920/", (width <= 2048 && height <= 3072) ? "/s2048x3072/" : "/s99999x99999/"); } string pageContent = ""; int errCnt = 0; Exception lastError = null; do { try { HttpWebRequest request = WebRequestFactory.CreateGetRequest(url, "", new Dictionary <string, string>() { { "Accept-Language", "en-US" }, { "Accept-Encoding", "gzip, deflate" } }, false); request.Accept = "text/html, application/xhtml+xml, */*"; request.UserAgent = ShellService.Settings.UserAgent; request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip; using (Ct.Register(() => request.Abort())) { pageContent = WebRequestFactory.ReadRequestToEndAsync(request).GetAwaiter().GetResult(); } errCnt = 9; } catch (WebException we) { if (we.Status == WebExceptionStatus.RequestCanceled) { throw new NullReferenceException("RetrieveOriginalImageUrl request cancelled"); } if (we.Response != null && ((HttpWebResponse)we.Response).StatusCode == HttpStatusCode.NotFound) { return(url); } } catch (Exception e) { errCnt++; Logger.Error("AbstractTumblrCrawler:RetrieveOriginalImageUrl: {0}", e); lastError = e; if (errCnt < 3) { Thread.Sleep(errCnt * 10000); } } } while (errCnt < 3); if (errCnt == 3) { ShellService.ShowError(lastError, Resources.PostNotParsable, Blog.Name); throw new NullReferenceException("RetrieveOriginalImageUrl download", lastError); } try { var extracted = extractJsonFromPage.Match(pageContent).Groups[1].Value; extracted = new Regex("/.*/").Replace(extracted, "\"\""); ImageResponse imgRsp = DeserializeImageResponse(extracted); int maxWidth = imgRsp.Images.Max(x => x.Width); Image img = imgRsp.Images.FirstOrDefault(x => x.Width == maxWidth); return(string.IsNullOrEmpty(img?.MediaKey) ? url : img.Url); } catch (Exception ex) { Logger.Error("AbstractTumblrCrawler:RetrieveOriginalImageUrl: {0}", ex); ShellService.ShowError(ex, Resources.PostNotParsable, Blog.Name); throw new NullReferenceException("RetrieveOriginalImageUrl parsing", ex); } }