private void DownloadMedia(TumblrTaggedSearchApi page) { try { foreach (var post in page.Response.Timeline.Elements) { if (!CheckIfWithinTimespan(post.Timestamp)) { continue; } foreach (var content in post.Content) { DownloadMedia(content, post.Id, post.Timestamp, post.Tags); } AddToJsonQueue(new TumblrCrawlerData <Datum>(Path.ChangeExtension(post.Id, ".json"), post)); } } catch (TimeoutException timeoutException) { HandleTimeoutException(timeoutException, Resources.Crawling); } catch { } }
private async Task CrawlPageAsync() { try { string document = await GetTaggedSearchPageAsync(); string json = extractJsonFromSearch.Match(document).Groups[1].Value; TagSearch result = ConvertJsonToClass <TagSearch>(json); if (result.Tagged.ShouldRedirect) { document = await GetTaggedSearchPageAsync(true); json = extractJsonFromSearch.Match(document).Groups[1].Value; result = ConvertJsonToClass <TagSearch>(json); } string nextUrl = result.ApiUrl + result.Tagged.Timeline.Links.Next.Href; string bearerToken = result.ApiFetchStore.APITOKEN; DownloadMedia(result); while (true) { if (CheckIfShouldStop()) { return; } CheckIfShouldPause(); document = await GetRequestAsync(nextUrl, bearerToken); TumblrTaggedSearchApi apiresult = ConvertJsonToClass <TumblrTaggedSearchApi>(document); if (apiresult.Response.Timeline.Links == null) { return; } nextUrl = result.ApiUrl + apiresult.Response.Timeline.Links.Next.Href; DownloadMedia(apiresult); } } catch (TimeoutException timeoutException) { HandleTimeoutException(timeoutException, Resources.Crawling); } catch (Exception e) { Logger.Error("CrawlPageAsync: {0}", e); } finally { semaphoreSlim.Release(); } }
private void DownloadMedia(TumblrTaggedSearchApi page) { try { foreach (var post in page.Response.Timeline.Elements) { if (!CheckIfWithinTimespan(post.Timestamp)) { continue; } int index = -1; try { foreach (var content in post.Content) { Post data = new Post() { Date = DateTimeOffset.FromUnixTimeSeconds(post.Timestamp).DateTime.ToString("R"), DateGmt = DateTimeOffset.FromUnixTimeSeconds(post.Timestamp).DateTime.ToString("R"), Type = ConvertContentTypeToPostType(content.Type), Id = post.Id, Tags = new List <string>(post.Tags), Slug = post.Slug, RegularTitle = post.Summary, RebloggedFromName = "", RebloggedRootName = "", ReblogKey = post.ReblogKey, UnixTimestamp = post.Timestamp, Tumblelog = new TumbleLog2() { Name = post.BlogName } }; index += (post.Content.Count > 1) ? 1 : 0; DownloadMedia(content, data, index); } AddToJsonQueue(new CrawlerData <Datum>(Path.ChangeExtension(post.Id, ".json"), post)); } catch (TimeoutException timeoutException) { HandleTimeoutException(timeoutException, Resources.Crawling); } catch (Exception ex) //NullReferenceException { Logger.Error("TumblrTagSearchCrawler.DownloadMedia: {0}", ex); } } } catch (Exception e) { Logger.Error("DownloadMedia: {0}", e); } }