Exemplo n.º 1
0
        private async Task <string> GetGuestToken()
        {
            if (guestToken == null)
            {
                var requestRegistration = new CancellationTokenRegistration();
                try
                {
                    string url = await GetApiUrl(Blog.Url, 0, "", 0);

                    if (ShellService.Settings.LimitConnectionsApi)
                    {
                        CrawlerService.TimeconstraintApi.Acquire();
                    }
                    var headers = new Dictionary <string, string>();
                    headers.Add("Origin", "https://twitter.com");
                    headers.Add("Authorization", "Bearer " + BearerToken);
                    HttpWebRequest request = WebRequestFactory.CreatePostRequest(url, "https://twitter.com", headers);
                    CookieService.GetUriCookie(request.CookieContainer, new Uri("https://twitter.com"));
                    requestRegistration = Ct.Register(() => request.Abort());
                    var content = await WebRequestFactory.ReadRequestToEndAsync(request);

                    guestToken = ((JValue)((JObject)JsonConvert.DeserializeObject(content))["guest_token"]).Value <string>();
                }
                catch (Exception e)
                {
                    Logger.Error("GetGuestToken: {0}", e);
                }
                finally
                {
                    requestRegistration.Dispose();
                }
            }
            return(guestToken);
        }
Exemplo n.º 2
0
        protected virtual async Task <string> RequestPostAsync(int pageNumber)
        {
            var requestRegistration = new CancellationTokenRegistration();

            try
            {
                string url     = "https://www.tumblr.com/search/" + Blog.Name + "/post_page/" + pageNumber;
                string referer = @"https://www.tumblr.com/search/" + Blog.Name;
                var    headers = new Dictionary <string, string> {
                    { "X-tumblr-form-key", tumblrKey }, { "DNT", "1" }
                };
                HttpWebRequest request = WebRequestFactory.CreatePostXhrReqeust(url, referer, headers);
                CookieService.GetUriCookie(request.CookieContainer, new Uri("https://www.tumblr.com/"));

                //Example request body, searching for cars:
                //q=cars&sort=top&post_view=masonry&blogs_before=8&num_blogs_shown=8&num_posts_shown=20&before=24&blog_page=2&safe_mode=true&post_page=2&filter_nsfw=true&filter_post_type=&next_ad_offset=0&ad_placement_id=0&more_posts=true

                string requestBody = "q=" + Blog.Name + "&sort=top&post_view=masonry&num_posts_shown=" +
                                     ((pageNumber - 1) * Blog.PageSize) + "&before=" + ((pageNumber - 1) * Blog.PageSize) +
                                     "&safe_mode=false&post_page=" + pageNumber +
                                     "&filter_nsfw=false&filter_post_type=&next_ad_offset=0&ad_placement_id=0&more_posts=true";
                await WebRequestFactory.PerformPostXHRReqeustAsync(request, requestBody);

                requestRegistration = Ct.Register(() => request.Abort());
                return(await WebRequestFactory.ReadReqestToEndAsync(request));
            }
            finally
            {
                requestRegistration.Dispose();
            }
        }
Exemplo n.º 3
0
        protected async Task <string> RequestApiDataAsync(string url, string bearerToken, Dictionary <string, string> headers = null,
                                                          IEnumerable <string> cookieHosts = null)
        {
            var requestRegistration = new CancellationTokenRegistration();

            try
            {
                HttpWebRequest request = WebRequestFactory.CreateGetRequest(url, string.Empty, headers);
                cookieHosts = cookieHosts ?? new List <string>();
                foreach (string cookieHost in cookieHosts)
                {
                    CookieService.GetUriCookie(request.CookieContainer, new Uri(cookieHost));
                }

                request.PreAuthenticate = true;
                request.Headers.Add("Authorization", "Bearer " + bearerToken);
                request.Accept = "application/json";

                requestRegistration = Ct.Register(() => request.Abort());
                return(await WebRequestFactory.ReadRequestToEndAsync(request));
            }
            finally
            {
                requestRegistration.Dispose();
            }
        }
Exemplo n.º 4
0
        protected async Task <string> RequestDataAsync(string url, Dictionary <string, string> headers = null,
                                                       IEnumerable <string> cookieHosts = null)
        {
            var requestRegistration = new CancellationTokenRegistration();

            try
            {
                int             redirects = 0;
                ResponseDetails responseDetails;
                do
                {
                    HttpWebRequest request = WebRequestFactory.CreateGetRequest(url, string.Empty, headers, false);
                    cookieHosts = cookieHosts ?? new List <string>();
                    foreach (string cookieHost in cookieHosts)
                    {
                        CookieService.GetUriCookie(request.CookieContainer, new Uri(cookieHost));
                    }

                    requestRegistration = Ct.Register(() => request.Abort());
                    responseDetails     = await WebRequestFactory.ReadRequestToEnd2Async(request);

                    if (responseDetails.HttpStatusCode == HttpStatusCode.Found)
                    {
                        url = request.RequestUri.GetLeftPart(UriPartial.Authority) + responseDetails.RedirectUrl;
                    }
                } while (responseDetails.HttpStatusCode == HttpStatusCode.Found && redirects++ < 5);

                if (responseDetails.HttpStatusCode == HttpStatusCode.Found)
                {
                    throw new WebException("Too many automatic redirections were attempted.", WebExceptionStatus.ProtocolError);
                }

                return(responseDetails.Response);
            }
            finally
            {
                requestRegistration.Dispose();
            }
        }
Exemplo n.º 5
0
        protected async Task <string> RequestDataAsync(string url, Dictionary <string, string> headers = null,
                                                       IEnumerable <string> cookieHosts = null)
        {
            var requestRegistration = new CancellationTokenRegistration();

            try
            {
                HttpWebRequest request = WebRequestFactory.CreateGetReqeust(url, string.Empty, headers);
                cookieHosts = cookieHosts ?? new List <string>();
                foreach (string cookieHost in cookieHosts)
                {
                    CookieService.GetUriCookie(request.CookieContainer, new Uri(cookieHost));
                }

                requestRegistration = Ct.Register(() => request.Abort());
                return(await WebRequestFactory.ReadReqestToEndAsync(request));
            }
            finally
            {
                requestRegistration.Dispose();
            }
        }
Exemplo n.º 6
0
        protected string RetrieveOriginalImageUrl(string url, int width, int height)
        {
            if (width > height)
            {
                (width, height) = (height, width);
            }
            if (ShellService.Settings.ImageSize != "best" ||
                !url.Contains("/s1280x1920/") ||
                (width <= 1280 && height <= 1920))
            {
                return(url);
            }

            url = url.Replace("/s1280x1920/", (width <= 2048 && height <= 3072) ? "/s2048x3072/" : "/s99999x99999/");
            string pageContent = "";

            try
            {
                HttpWebRequest request = WebRequestFactory.CreateGetRequest(url, "",
                                                                            new Dictionary <string, string>()
                {
                    { "Accept-Language", "en-US" }, { "Accept-Encoding", "gzip, deflate" }
                }, false);
                request.Accept    = "text/html, application/xhtml+xml, */*";
                request.UserAgent = ShellService.Settings.UserAgent;
                request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip;
                pageContent = WebRequestFactory.ReadRequestToEndAsync(request).GetAwaiter().GetResult();
            }
            catch (Exception e)
            {
                Logger.Error("TumblrBlogCrawler:RetrieveRawImageUrl:Exception {0}", e);
                return(url);
            }
            pageContent = extractJsonFromPage.Match(pageContent).Groups[1].Value;
            ImageResponse imgRsp = ConvertJsonToClass <ImageResponse>(pageContent);
            Image         img    = imgRsp.Images.FirstOrDefault(x => x.HasOriginalDimensions = true);

            return(string.IsNullOrEmpty(img.MediaKey) ? url : img.Url);
        }
Exemplo n.º 7
0
        protected virtual async Task <string> RequestDataAsync(string limit, string offset)
        {
            var requestRegistration = new CancellationTokenRegistration();

            try
            {
                string url = @"https://www.tumblr.com/svc/indash_blog?tumblelog_name_or_id=" + Blog.Name +
                             @"&post_id=&limit=" + limit + "&offset=" + offset + "&should_bypass_safemode=true";
                string referer = @"https://www.tumblr.com/dashboard/blog/" + Blog.Name;
                var    headers = new Dictionary <string, string> {
                    { "X-tumblr-form-key", tumblrKey }
                };
                HttpWebRequest request = WebRequestFactory.CreateGetXhrRequest(url, referer, headers);
                CookieService.GetUriCookie(request.CookieContainer, new Uri("https://www.tumblr.com/"));
                CookieService.GetUriCookie(request.CookieContainer, new Uri("https://" + Blog.Name.Replace("+", "-") + ".tumblr.com"));
                requestRegistration = Ct.Register(() => request.Abort());
                return(await WebRequestFactory.ReadRequestToEndAsync(request));
            }
            finally
            {
                requestRegistration.Dispose();
            }
        }
Exemplo n.º 8
0
        protected async Task <string> RequestDataAsync(string url, Dictionary <string, string> headers = null,
                                                       IEnumerable <string> cookieHosts = null)
        {
            var requestRegistration = new CancellationTokenRegistration();

            try
            {
                int             redirects = 0;
                ResponseDetails responseDetails;

                do
                {
                    HttpWebRequest request = WebRequestFactory.CreateGetRequest(url, string.Empty, headers, false);
                    cookieHosts = cookieHosts ?? new List <string>();
                    foreach (string cookieHost in cookieHosts)
                    {
                        CookieService.GetUriCookie(request.CookieContainer, new Uri(cookieHost));
                    }

                    requestRegistration = Ct.Register(() => request.Abort());
                    responseDetails     = await WebRequestFactory.ReadRequestToEnd2Async(request);

                    url = responseDetails.RedirectUrl ?? url;

                    if (responseDetails.HttpStatusCode == HttpStatusCode.Found)
                    {
                        if (url.Contains("privacy/consent"))
                        {
                            var ex = new Exception("Acceptance of privacy consent needed!");
                            ShellService.ShowError(new TumblrPrivacyConsentException(ex), Resources.ConfirmationTumblrPrivacyConsentNeeded);
                            throw ex;
                        }
                        if (!url.StartsWith("http", StringComparison.InvariantCultureIgnoreCase))
                        {
                            url = request.RequestUri.GetLeftPart(UriPartial.Authority) + url;
                        }
                    }

                    if (responseDetails.HttpStatusCode == HttpStatusCode.Moved)
                    {
                        Uri uri = new Uri(url);
                        if (!uri.Authority.Contains(".tumblr."))
                        {
                            Blog.Url = uri.GetLeftPart(UriPartial.Authority);
                        }
                    }
                } while ((responseDetails.HttpStatusCode == HttpStatusCode.Found || responseDetails.HttpStatusCode == HttpStatusCode.Moved) && redirects++ < 5);

                if (responseDetails.HttpStatusCode == HttpStatusCode.Found)
                {
                    throw new WebException("Too many automatic redirections were attempted.", WebExceptionStatus.ProtocolError);
                }

                return(responseDetails.Response);
            }
            catch (Exception e)
            {
                Logger.Error("AbstractCrawler.RequestDataAsync: {0}", e);
                throw;
            }
            finally
            {
                requestRegistration.Dispose();
            }
        }
Exemplo n.º 9
0
        protected string RetrieveOriginalImageUrl(string url, int width, int height)
        {
            if (width > height)
            {
                (width, height) = (height, width);
            }
            if (ShellService.Settings.ImageSize != "best" ||
                !url.Contains("/s1280x1920/") ||
                (width <= 1280 && height <= 1920))
            {
                return(url);
            }

            url = url.Replace("/s1280x1920/", (width <= 2048 && height <= 3072) ? "/s2048x3072/" : "/s99999x99999/");
            string    pageContent = "";
            int       errCnt      = 0;
            Exception lastError   = null;

            do
            {
                try
                {
                    HttpWebRequest request = WebRequestFactory.CreateGetRequest(url, "",
                                                                                new Dictionary <string, string>()
                    {
                        { "Accept-Language", "en-US" }, { "Accept-Encoding", "gzip, deflate" }
                    }, false);
                    request.Accept    = "text/html, application/xhtml+xml, */*";
                    request.UserAgent = ShellService.Settings.UserAgent;
                    request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip;
                    pageContent = WebRequestFactory.ReadRequestToEndAsync(request).GetAwaiter().GetResult();
                    errCnt      = 9;
                }
                catch (WebException we)
                {
                    if (we.Response != null && ((HttpWebResponse)we.Response).StatusCode == HttpStatusCode.NotFound)
                    {
                        return(url);
                    }
                }
                catch (Exception e)
                {
                    errCnt++;
                    Logger.Error("AbstractTumblrCrawler:RetrieveOriginalImageUrl: {0}", e);
                    lastError = e;
                    if (errCnt < 3)
                    {
                        Thread.Sleep(errCnt * 10000);
                    }
                }
            } while (errCnt < 3);
            if (errCnt == 3)
            {
                ShellService.ShowError(lastError, Resources.PostNotParsable, Blog.Name);
                throw new NullReferenceException("RetrieveOriginalImageUrl download", lastError);
            }
            try
            {
                var extracted = extractJsonFromPage.Match(pageContent).Groups[1].Value;
                extracted = new Regex("/.*/").Replace(extracted, "\"\"");
                ImageResponse imgRsp   = DeserializeImageResponse(extracted);
                int           maxWidth = imgRsp.Images.Max(x => x.Width);
                Image         img      = imgRsp.Images.FirstOrDefault(x => x.Width == maxWidth);
                return(string.IsNullOrEmpty(img?.MediaKey) ? url : img.Url);
            }
            catch (Exception ex)
            {
                Logger.Error("AbstractTumblrCrawler:RetrieveOriginalImageUrl: {0}", ex);
                ShellService.ShowError(ex, Resources.PostNotParsable, Blog.Name);
                throw new NullReferenceException("RetrieveOriginalImageUrl parsing", ex);
            }
        }