Exemplo n.º 1
0
 public bool TryGetPost(out DataAcquirerPost post)
 {
     if (IsCompleted)
     {
         post = null;
         return(false);
     }
     return(_posts.TryTake(out post));
 }
Exemplo n.º 2
0
 private static DataAcquirerPost FromStatus(Status item, string query)
 {
     return(DataAcquirerPost.FromValues(
                item.StatusID.ToString(),
                item.FullText,
                item.Lang,
                "twitter",
                item.User.ScreenNameResponse,
                item.CreatedAt.ToString("s"),
                query));
 }
Exemplo n.º 3
0
        public DataAcquirerPost Freeze()
        {
            var datetime = DateTime ?? System.DateTime.Now.ToString("s");

            return(DataAcquirerPost.FromValues(
                       OriginalPostId,
                       Text,
                       Language,
                       Source,
                       UserId,
                       datetime,
                       Query));
        }
Exemplo n.º 4
0
        private DataAcquirerPost FromPost(Post r, string query)
        {
            var listingPost = r.Listing;

            return(DataAcquirerPost.FromValues(
                       listingPost.Id,
                       //"(title:" + listingPost.Title + ")" + listingPost.SelfText,
                       listingPost.SelfText,
                       "en",
                       "reddit",
                       r.Author ?? "n/a",
                       listingPost.CreatedUTC.ToString("s"),
                       query));
        }
Exemplo n.º 5
0
        private DataAcquirerPost GetRandomPost(int seed)
        {
            var postText       = GetRandomString(seed, 100);
            var postSource     = "random-data";
            var postUser       = GetRandomString(seed, 12);
            var dateTimeString = DateTime.Now.ToString("s");

            var id = Guid.NewGuid();

            return(DataAcquirerPost.FromValues(
                       $"tw-{id}",
                       postText,
                       "en",
                       postSource,
                       postUser,
                       dateTimeString));
        }
Exemplo n.º 6
0
        private static Guid CalculatePostId(DataAcquirerJobConfig jobConfig, DataAcquirerPost dataPost)
        {
            var bytes = new byte[16];

            var textHash   = dataPost.Text.GetHashCode();
            var postIdHash = dataPost.OriginalPostId.GetHashCode();
            var userIdHash = dataPost.UserId.GetHashCode();
            var dateIdHash = dataPost.DateTime.GetHashCode();
            var jobId      = jobConfig.JobId.GetHashCode();

            dateIdHash += jobId;

            BitConverter.GetBytes(textHash).CopyTo(bytes, 0);
            BitConverter.GetBytes(postIdHash).CopyTo(bytes, 3);
            BitConverter.GetBytes(userIdHash).CopyTo(bytes, 7);
            BitConverter.GetBytes(dateIdHash).CopyTo(bytes, 11);

            var postId = new Guid(bytes);

            return(postId);
        }
Exemplo n.º 7
0
        public async IAsyncEnumerable <DataAcquirerPost> GetPostsAsync(
            DataAcquirerInputModel acquirerInputModel,
            [EnumeratorCancellation] CancellationToken cancellationToken)
        {
            ulong id = 0;

            while (true)
            {
                var count = acquirerInputModel.BatchSize;

                var posts = _postsEnumerator
                            .Take(count)
                            .Select(post =>
                                    DataAcquirerPost.FromValues(
                                        post.OriginalPostId,
                                        post.Text,
                                        post.Language,
                                        post.Source,
                                        post.UserId,
                                        post.PostDateTime))
                            .ToList();


                id += (ulong)count;
                try
                {
                    await Task.Delay(_downloadSimulatedDelay, CancellationToken.None);
                }
                catch (TaskCanceledException)
                {
                }

                foreach (var post in posts)
                {
                    yield return(post);
                }
                ;
            }
        }
Exemplo n.º 8
0
        public static OkErrorResult ValidatePost(DataAcquirerPost post)
        {
            IEnumerable <string> enumerateErrors()
            {
                if (string.IsNullOrEmpty(post.OriginalPostId))
                {
                    yield return(string.Format(_errorEmpty, post.OriginalPostId));
                }
                if (string.IsNullOrEmpty(post.DateTime))
                {
                    yield return(string.Format(_errorEmpty, post.DateTime));
                }
            }

            var validationsErrors = enumerateErrors();

            if (validationsErrors.Any())
            {
                var errors       = string.Join("\n", validationsErrors);
                var errorMessage = string.Format(_validationErrorsFound, errors);
                return(OkErrorResult.Error(errorMessage));
            }
            return(OkErrorResult.Successful());
        }
Exemplo n.º 9
0
        public async IAsyncEnumerable <DataAcquirerPost> GetPostsAsync(
            DataAcquirerInputModel acquirerInputModel,
            [EnumeratorCancellation] CancellationToken cancellationToken)
        {
            var credentials = ExtractCredentials(acquirerInputModel);
            var reddit      = await _redditContextProvider.GetContextAsync(credentials);

            var query = acquirerInputModel.Query;

            var      limit  = 50;
            DateTime?before = null;

            while (true)
            {
                var    maxBefore   = before;
                var    count       = 0;
                string after       = null;
                var    postListing = GetPosts(reddit, after, limit, query, count);
                var    outDated    = false;
                while (postListing.Count > 0)
                {
                    var children = postListing;
                    foreach (var item in children)
                    {
                        if (item.Created <= before)
                        {
                            outDated = true;
                            break;
                        }
                        count++;
                        maxBefore = Max(item.Created, maxBefore);

                        if (!string.IsNullOrWhiteSpace(item.Listing.SelfText))
                        {
                            yield return(FromPost(item, query));
                        }
                        var comments = item.Comments.GetTop(100);
                        foreach (var c in comments)
                        {
                            if (string.IsNullOrWhiteSpace(c.Body))
                            {
                                continue;
                            }
                            var listingPost = item.Listing;
                            yield return(DataAcquirerPost.FromValues(
                                             listingPost.Id,
                                             //"(title:" + listingPost.Title + ",comment)" + c.Body,
                                             c.Body,
                                             "en",
                                             "reddit",
                                             c.Author ?? "n/a",
                                             listingPost.CreatedUTC.ToString("s"),
                                             query));
                        }
                    }

                    if (outDated)
                    {
                        break;
                    }
                    after = postListing.Count > 0 ? postListing.Last().Fullname : after;

                    postListing = GetPosts(reddit, after, limit, query, count);
                }
                before = maxBefore;

                await Task.Delay(TimeSpan.FromMinutes(10));
            }
        }