private void DownloadMedia(TumblrSearchApi page) { try { foreach (var post in page.Response.Posts.Data) { if (!CheckIfWithinTimespan(post.Timestamp)) { continue; } foreach (var content in post.Content) { DownloadMedia(content, post.Id, post.Timestamp); } AddToJsonQueue(new TumblrCrawlerData <Datum>(Path.ChangeExtension(post.Id, ".json"), post)); } } catch (TimeoutException timeoutException) { HandleTimeoutException(timeoutException, Resources.Crawling); } catch { } }
private async Task CrawlPageAsync() { try { string document = await GetSearchPageAsync(); string json = extractJsonFromSearch.Match(document).Groups[1].Value; SearchJson result = ConvertJsonToClassNew <SearchJson>(json); string nextUrl = result.ApiUrl + result.SearchRoute.SearchApiResponse.Response.Posts.Links.Next.Href; string bearerToken = result.ApiFetchStore.APITOKEN; DownloadMedia(result); while (true) { if (CheckIfShouldStop()) { return; } CheckIfShouldPause(); document = await GetRequestAsync(nextUrl, bearerToken); TumblrSearchApi apiresult = ConvertJsonToClassNew <TumblrSearchApi>(document); DownloadMedia(apiresult); if (apiresult.Response.Posts.Links == null) { return; } nextUrl = result.ApiUrl + apiresult.Response.Posts.Links.Next.Href; } } catch (TimeoutException timeoutException) { HandleTimeoutException(timeoutException, Resources.Crawling); } catch (Exception e) { Logger.Error("TumblrSearchCrawler.CrawlPageAsync: {0}", e); } finally { semaphoreSlim.Release(); } }
private void DownloadMedia(TumblrSearchApi page) { try { foreach (var post in page.Response.Posts.Data) { if (!CheckIfWithinTimespan(post.Timestamp)) { continue; } int index = -1; foreach (var content in post.Content) { Post data = new Post() { Date = DateTimeOffset.FromUnixTimeSeconds(post.Timestamp).DateTime.ToString("yyyyMMddHHmmss"), Type = ConvertContentTypeToPostType(content.Type), Id = post.Id, Tags = new List <string>(post.Tags), Slug = post.Slug, RegularTitle = post.Summary, RebloggedFromName = "", ReblogKey = post.ReblogKey, UnixTimestamp = post.Timestamp, Submitter = post.BlogName }; index += (post.Content.Count > 1) ? 1 : 0; DownloadMedia(content, data, index); } AddToJsonQueue(new TumblrCrawlerData <Datum>(Path.ChangeExtension(post.Id, ".json"), post)); } } catch (TimeoutException timeoutException) { HandleTimeoutException(timeoutException, Resources.Crawling); } catch { } }