Esempio n. 1
0
        public void UpdateAnswersWithQuestionTags(string path, int size)
        {
            if (!_client.Indices.Exists(PostsIndex).Exists)
            {
                throw new Exception($"{PostsIndex} index does not exist. You must run the 'posts' command to index posts first");
            }

            var  postIdsAndTags      = StackOverflowData.GetPostTagsWithAnswers(path);
            long totalAnswersUpdated = 0;
            var  totalQuestions      = 0;
            var  stopWatch           = Stopwatch.StartNew();

            foreach (var batch in postIdsAndTags.Batch(size))
            {
                var tasks = batch.Select(b =>
                {
                    var(id, tags) = b;
                    return(_client.UpdateByQueryAsync <Answer>(u => u
                                                               .Routing(id)
                                                               .Query(q => + q
                                                                      .ParentId(p => p
                                                                                .Id(id)
                                                                                .Type <Answer>()
                                                                                )
                                                                      )
                                                               .Conflicts(Conflicts.Proceed)
                                                               .Index(PostsIndex)
                                                               .Timeout(TimeSpan.FromMinutes(1))
                                                               .WaitForCompletion()
                                                               .Script(ss => ss
                                                                       .Source(@"if (ctx._source.tags == null) { 
                                        ctx._source.tags = params.tags; 
                                    } else { 
                                        ctx.op = ""noop"";
                                    }")
                                                                       .Params(p => p
                                                                               .Add("tags", tags)
                                                                               )
                                                                       )
                                                               ));
                }).ToArray();

                var task = Task.WhenAll(tasks);
                task.Wait();

                if (task.Status == TaskStatus.Faulted)
                {
                    throw task.Exception.Flatten();
                }

                totalQuestions      += tasks.Length;
                totalAnswersUpdated += tasks.Sum(t => t.Result.Updated);
                Log.WriteLine($"Updated {totalAnswersUpdated} answers for {totalQuestions} questions");
            }

            Log.WriteLine($"time taken to update answers: {stopWatch.Elapsed}");
        }
Esempio n. 2
0
        public void IndexUsers(string usersPath, string badgesPath)
        {
            CreateUsersIndexIfNotExists();

            _client.Indices.UpdateSettings(UsersIndex, u => u
                                           .IndexSettings(i => i
                                                          .RefreshInterval("-1")
                                                          )
                                           );

            var size        = 1000;
            var seenPages   = 0;
            var indexedDocs = 0;
            var totalDocs   = 0;
            var handle      = new ManualResetEvent(false);

            var users          = StackOverflowData.GetUsers(usersPath);
            var observableBulk = _client.BulkAll(users, f => f
                                                 .MaxDegreeOfParallelism(16)
                                                 .BackOffTime(TimeSpan.FromSeconds(10))
                                                 .BackOffRetries(2)
                                                 .Size(size)
                                                 .RefreshOnCompleted()
                                                 .Index(UsersIndex)
                                                 );

            Exception exception    = null;
            var       bulkObserver = new BulkAllObserver(
                onError: e =>
            {
                exception = e;
                handle.Set();
            },
                onCompleted: () => handle.Set(),
                onNext: b =>
            {
                Interlocked.Add(ref indexedDocs, b.Items.Count(i => i.IsValid));
                Interlocked.Add(ref totalDocs, b.Items.Count);
                Interlocked.Increment(ref seenPages);
                Log.WriteLine($"indexed users page {seenPages}, {indexedDocs} out of {totalDocs}");
            }
                );

            var stopWatch = Stopwatch.StartNew();

            observableBulk.Subscribe(bulkObserver);
            handle.WaitOne();

            if (exception != null)
            {
                throw exception;
            }

            Log.WriteLine($"Time taken to index users: {stopWatch.Elapsed}");

            // update user badges
            seenPages   = 0;
            indexedDocs = 0;
            totalDocs   = 0;
            handle      = new ManualResetEvent(false);

            var badgeMetas = StackOverflowData.GetBadgeMetas(badgesPath);

            var observableBadgeBulk = _client.BulkAll(badgeMetas, f => f
                                                      .Index <User>()
                                                      .MaxDegreeOfParallelism(8)
                                                      .Size(size)
                                                      .BufferToBulk((bulk, badges) =>
            {
                foreach (var badge in badges)
                {
                    bulk.Update <User>(u => u
                                       .Script(s => s
                                               .Source(@"if (ctx._source.badges == null) { 
                                                    ctx._source.badges = [params.badge]; 
                                                } else if (ctx._source.badges.any(b -> b.name == params.badge.name) == false) { 
                                                    ctx._source.badges.add(params.badge); 
                                                }")
                                               .Params(d => d
                                                       .Add("badge", badge.Badge)
                                                       )
                                               )
                                       .Id(badge.UserId)
                                       .RetriesOnConflict(10)
                                       );
                }
            })
                                                      .RefreshOnCompleted()
                                                      );

            bulkObserver = new BulkAllObserver(
                onError: e =>
            {
                exception = e;
                handle.Set();
            },
                onCompleted: () => handle.Set(),
                onNext: b =>
            {
                Interlocked.Add(ref indexedDocs, b.Items.Count(i => i.IsValid));
                Interlocked.Add(ref totalDocs, b.Items.Count);
                Interlocked.Increment(ref seenPages);
                Log.WriteLine($"indexed badges page {seenPages}, {indexedDocs} out of {totalDocs}");
            }
                );

            stopWatch.Restart();
            observableBadgeBulk.Subscribe(bulkObserver);
            handle.WaitOne();

            if (exception != null)
            {
                throw exception;
            }

            Log.WriteLine($"Time taken to index badges: {stopWatch.Elapsed}");

            _client.Indices.UpdateSettings(UsersIndex, u => u
                                           .IndexSettings(i => i
                                                          .RefreshInterval("30s")
                                                          )
                                           );
        }
Esempio n. 3
0
        public void IndexPosts(string path)
        {
            CreatePostsIndexIfNotExists();

            _client.Indices.UpdateSettings(PostsIndex, u => u
                                           .IndexSettings(i => i
                                                          .RefreshInterval("-1")
                                                          )
                                           );

            var handle         = new ManualResetEvent(false);
            var size           = 1000;
            var posts          = StackOverflowData.GetPosts(path);
            var observableBulk = _client.BulkAll(posts, f => f
                                                 .MaxDegreeOfParallelism(Environment.ProcessorCount * 2)
                                                 .BackOffTime(TimeSpan.FromSeconds(10))
                                                 .BackOffRetries(2)
                                                 .Size(size)
                                                 .BufferToBulk((bulk, buffer) =>
            {
                foreach (var post in buffer)
                {
                    if (post is Question question)
                    {
                        var item = new BulkIndexOperation <Question>(question);
                        bulk.AddOperation(item);
                    }
                    else
                    {
                        var answer = (Answer)post;
                        var item   = new BulkIndexOperation <Answer>(answer);
                        bulk.AddOperation(item);
                    }
                }
            })
                                                 .RefreshOnCompleted()
                                                 .Index(PostsIndex)
                                                 );

            var seenPages   = 0;
            var indexedDocs = 0;
            var totalDocs   = 0;

            Exception exception    = null;
            var       bulkObserver = new BulkAllObserver(
                onError: e =>
            {
                exception = e;
                handle.Set();
            },
                onCompleted: () => handle.Set(),
                onNext: b =>
            {
                Interlocked.Add(ref indexedDocs, b.Items.Count(i => i.IsValid));
                Interlocked.Add(ref totalDocs, b.Items.Count);
                Interlocked.Increment(ref seenPages);
                Log.WriteLine($"indexed page {seenPages} of questions and answers, {indexedDocs} out of {totalDocs}");
            }
                );

            var stopWatch = Stopwatch.StartNew();

            observableBulk.Subscribe(bulkObserver);
            handle.WaitOne();

            if (exception != null)
            {
                throw exception;
            }

            Log.WriteLine($"time taken to index posts: {stopWatch.Elapsed}");

            _client.Indices.UpdateSettings(PostsIndex, u => u
                                           .IndexSettings(i => i
                                                          .RefreshInterval("30s")
                                                          )
                                           );
        }