public bool TryGetPost(out DataAcquirerPost post) { if (IsCompleted) { post = null; return(false); } return(_posts.TryTake(out post)); }
private static DataAcquirerPost FromStatus(Status item, string query) { return(DataAcquirerPost.FromValues( item.StatusID.ToString(), item.FullText, item.Lang, "twitter", item.User.ScreenNameResponse, item.CreatedAt.ToString("s"), query)); }
public DataAcquirerPost Freeze() { var datetime = DateTime ?? System.DateTime.Now.ToString("s"); return(DataAcquirerPost.FromValues( OriginalPostId, Text, Language, Source, UserId, datetime, Query)); }
private DataAcquirerPost FromPost(Post r, string query) { var listingPost = r.Listing; return(DataAcquirerPost.FromValues( listingPost.Id, //"(title:" + listingPost.Title + ")" + listingPost.SelfText, listingPost.SelfText, "en", "reddit", r.Author ?? "n/a", listingPost.CreatedUTC.ToString("s"), query)); }
private DataAcquirerPost GetRandomPost(int seed) { var postText = GetRandomString(seed, 100); var postSource = "random-data"; var postUser = GetRandomString(seed, 12); var dateTimeString = DateTime.Now.ToString("s"); var id = Guid.NewGuid(); return(DataAcquirerPost.FromValues( $"tw-{id}", postText, "en", postSource, postUser, dateTimeString)); }
private static Guid CalculatePostId(DataAcquirerJobConfig jobConfig, DataAcquirerPost dataPost) { var bytes = new byte[16]; var textHash = dataPost.Text.GetHashCode(); var postIdHash = dataPost.OriginalPostId.GetHashCode(); var userIdHash = dataPost.UserId.GetHashCode(); var dateIdHash = dataPost.DateTime.GetHashCode(); var jobId = jobConfig.JobId.GetHashCode(); dateIdHash += jobId; BitConverter.GetBytes(textHash).CopyTo(bytes, 0); BitConverter.GetBytes(postIdHash).CopyTo(bytes, 3); BitConverter.GetBytes(userIdHash).CopyTo(bytes, 7); BitConverter.GetBytes(dateIdHash).CopyTo(bytes, 11); var postId = new Guid(bytes); return(postId); }
public async IAsyncEnumerable <DataAcquirerPost> GetPostsAsync( DataAcquirerInputModel acquirerInputModel, [EnumeratorCancellation] CancellationToken cancellationToken) { ulong id = 0; while (true) { var count = acquirerInputModel.BatchSize; var posts = _postsEnumerator .Take(count) .Select(post => DataAcquirerPost.FromValues( post.OriginalPostId, post.Text, post.Language, post.Source, post.UserId, post.PostDateTime)) .ToList(); id += (ulong)count; try { await Task.Delay(_downloadSimulatedDelay, CancellationToken.None); } catch (TaskCanceledException) { } foreach (var post in posts) { yield return(post); } ; } }
public static OkErrorResult ValidatePost(DataAcquirerPost post) { IEnumerable <string> enumerateErrors() { if (string.IsNullOrEmpty(post.OriginalPostId)) { yield return(string.Format(_errorEmpty, post.OriginalPostId)); } if (string.IsNullOrEmpty(post.DateTime)) { yield return(string.Format(_errorEmpty, post.DateTime)); } } var validationsErrors = enumerateErrors(); if (validationsErrors.Any()) { var errors = string.Join("\n", validationsErrors); var errorMessage = string.Format(_validationErrorsFound, errors); return(OkErrorResult.Error(errorMessage)); } return(OkErrorResult.Successful()); }
public async IAsyncEnumerable <DataAcquirerPost> GetPostsAsync( DataAcquirerInputModel acquirerInputModel, [EnumeratorCancellation] CancellationToken cancellationToken) { var credentials = ExtractCredentials(acquirerInputModel); var reddit = await _redditContextProvider.GetContextAsync(credentials); var query = acquirerInputModel.Query; var limit = 50; DateTime?before = null; while (true) { var maxBefore = before; var count = 0; string after = null; var postListing = GetPosts(reddit, after, limit, query, count); var outDated = false; while (postListing.Count > 0) { var children = postListing; foreach (var item in children) { if (item.Created <= before) { outDated = true; break; } count++; maxBefore = Max(item.Created, maxBefore); if (!string.IsNullOrWhiteSpace(item.Listing.SelfText)) { yield return(FromPost(item, query)); } var comments = item.Comments.GetTop(100); foreach (var c in comments) { if (string.IsNullOrWhiteSpace(c.Body)) { continue; } var listingPost = item.Listing; yield return(DataAcquirerPost.FromValues( listingPost.Id, //"(title:" + listingPost.Title + ",comment)" + c.Body, c.Body, "en", "reddit", c.Author ?? "n/a", listingPost.CreatedUTC.ToString("s"), query)); } } if (outDated) { break; } after = postListing.Count > 0 ? postListing.Last().Fullname : after; postListing = GetPosts(reddit, after, limit, query, count); } before = maxBefore; await Task.Delay(TimeSpan.FromMinutes(10)); } }