Ejemplo n.º 1
0
 private void DownloadMedia(TumblrSearchApi page)
 {
     try
     {
         foreach (var post in page.Response.Posts.Data)
         {
             if (!CheckIfWithinTimespan(post.Timestamp))
             {
                 continue;
             }
             foreach (var content in post.Content)
             {
                 DownloadMedia(content, post.Id, post.Timestamp);
             }
             AddToJsonQueue(new TumblrCrawlerData <Datum>(Path.ChangeExtension(post.Id, ".json"), post));
         }
     }
     catch (TimeoutException timeoutException)
     {
         HandleTimeoutException(timeoutException, Resources.Crawling);
     }
     catch
     {
     }
 }
Ejemplo n.º 2
0
        private async Task CrawlPageAsync()
        {
            try
            {
                string document = await GetSearchPageAsync();

                string     json        = extractJsonFromSearch.Match(document).Groups[1].Value;
                SearchJson result      = ConvertJsonToClassNew <SearchJson>(json);
                string     nextUrl     = result.ApiUrl + result.SearchRoute.SearchApiResponse.Response.Posts.Links.Next.Href;
                string     bearerToken = result.ApiFetchStore.APITOKEN;

                DownloadMedia(result);
                while (true)
                {
                    if (CheckIfShouldStop())
                    {
                        return;
                    }
                    CheckIfShouldPause();

                    document = await GetRequestAsync(nextUrl, bearerToken);

                    TumblrSearchApi apiresult = ConvertJsonToClassNew <TumblrSearchApi>(document);
                    DownloadMedia(apiresult);

                    if (apiresult.Response.Posts.Links == null)
                    {
                        return;
                    }
                    nextUrl = result.ApiUrl + apiresult.Response.Posts.Links.Next.Href;
                }
            }
            catch (TimeoutException timeoutException)
            {
                HandleTimeoutException(timeoutException, Resources.Crawling);
            }
            catch (Exception e)
            {
                Logger.Error("TumblrSearchCrawler.CrawlPageAsync: {0}", e);
            }
            finally
            {
                semaphoreSlim.Release();
            }
        }
Ejemplo n.º 3
0
 private void DownloadMedia(TumblrSearchApi page)
 {
     try
     {
         foreach (var post in page.Response.Posts.Data)
         {
             if (!CheckIfWithinTimespan(post.Timestamp))
             {
                 continue;
             }
             int index = -1;
             foreach (var content in post.Content)
             {
                 Post data = new Post()
                 {
                     Date              = DateTimeOffset.FromUnixTimeSeconds(post.Timestamp).DateTime.ToString("yyyyMMddHHmmss"),
                     Type              = ConvertContentTypeToPostType(content.Type),
                     Id                = post.Id,
                     Tags              = new List <string>(post.Tags),
                     Slug              = post.Slug,
                     RegularTitle      = post.Summary,
                     RebloggedFromName = "",
                     ReblogKey         = post.ReblogKey,
                     UnixTimestamp     = post.Timestamp,
                     Submitter         = post.BlogName
                 };
                 index += (post.Content.Count > 1) ? 1 : 0;
                 DownloadMedia(content, data, index);
             }
             AddToJsonQueue(new TumblrCrawlerData <Datum>(Path.ChangeExtension(post.Id, ".json"), post));
         }
     }
     catch (TimeoutException timeoutException)
     {
         HandleTimeoutException(timeoutException, Resources.Crawling);
     }
     catch
     {
     }
 }