private void BulkAll(string index, IEnumerable <SmallObject> documents, int size, int pages, int numberOfDocuments)
        {
            var handle    = new ManualResetEvent(false);
            var seenPages = 0;
            //first we setup our cold observable
            var observableBulk = this._client.BulkAll(documents, f => f
                                                      .MaxDegreeOfParallelism(8)
                                                      .BackOffTime(TimeSpan.FromSeconds(10))
                                                      .BackOffRetries(2)
                                                      .Size(size)
                                                      .RefreshOnCompleted()
                                                      .Index(index)
                                                      );
            //we set up an observer
            var bulkObserver = new BulkAllObserver(
                onError: (e) => { handle.Set(); throw e; },
                onCompleted: () => handle.Set(),
                onNext: (b) => Interlocked.Increment(ref seenPages)
                );

            //when we subscribe the observable becomes hot
            observableBulk.Subscribe(bulkObserver);

            handle.WaitOne(TimeSpan.FromMinutes(5));

            seenPages.Should().Be(pages);
            var count = this._client.Count <SmallObject>(f => f.Index(index));

            count.Count.Should().Be(numberOfDocuments);
            bulkObserver.TotalNumberOfFailedBuffers.Should().Be(0);
        }
Esempio n. 2
0
        private async Task <int> PushAllImpl(IEnumerable <IDictionary <string, string> > lazyEnumerable, string mappingName)
        {
            var seenPages = 0;
            var tcs       = new TaskCompletionSource <int>();

            var observableBulk = _client.BulkAll(lazyEnumerable, bulkDescriptor => {
                bulkDescriptor
                .BufferToBulk((x, batch) => x.IndexMany(batch, (bd, d) => bd
                                                        .Id(d["PartitionKey"] + d["RowKey"]))
                              .Index(_indexNamer.BuildName(batch[0]["@timestamp"], mappingName)
                                     ))
                .Type(mappingName);

                if (_setPipeline)
                {
                    bulkDescriptor.Pipeline(mappingName.ToLower());
                }

                return(bulkDescriptor
                       .MaxDegreeOfParallelism(5)
                       .Size(_batchSize));
            });

            var observer = new BulkAllObserver(
                onNext: (b) => Interlocked.Increment(ref seenPages),
                onCompleted: () => tcs.SetResult(seenPages),
                onError: e => tcs.SetException(e));

            observableBulk.Subscribe(observer);
            return(await tcs.Task.ConfigureAwait(false));
        }
Esempio n. 3
0
        /// <summary>
        /// 批量插入.
        /// </summary>
        /// <typeparam name="T">对象.</typeparam>
        /// <param name="elasticClient">IElasticClient.</param>
        /// <param name="indexName">索引名称.</param>
        /// <param name="list">对象列表.</param>
        /// <returns>返回成功或失败.</returns>
        public static bool BulkAll <T>(IElasticClient elasticClient, IndexName indexName, IEnumerable <T> list)
            where T : class
        {
            const int size           = 1000;
            var       tokenSource    = new CancellationTokenSource();
            var       observableBulk = elasticClient.BulkAll(list, f => f
                                                             .MaxDegreeOfParallelism(8)
                                                             .BackOffTime(TimeSpan.FromSeconds(10))
                                                             .BackOffRetries(2)
                                                             .Size(size)
                                                             .RefreshOnCompleted()
                                                             .Index(indexName)
                                                             .BufferToBulk((r, buffer) => r.IndexMany(buffer))
                                                             , tokenSource.Token);

            var countdownEvent = new CountdownEvent(1);

            Exception exception = null;

            void OnCompleted()
            {
                WriteLine("BulkAll Finished");
                countdownEvent.Signal();
            }

            var bulkAllObserver = new BulkAllObserver(
                onNext: response =>
            {
                WriteLine($"Indexed {response.Page * size} with {response.Retries} retries");
            },
                onError: ex =>
            {
                WriteLine("BulkAll Error : {0}", ex);
                exception = ex;
                countdownEvent.Signal();
            },
                onCompleted: OnCompleted);

            observableBulk.Subscribe(bulkAllObserver);

            countdownEvent.Wait(tokenSource.Token);

            if (exception != null)
            {
                WriteLine(Format, arg0: exception);
                return(false);
            }
            else
            {
                return(true);
            }
        }
Esempio n. 4
0
    public void CancelBulkAll()
    {
        var index  = CreateIndexName();
        var handle = new ManualResetEvent(false);

        var size              = 1000;
        var pages             = 1000;
        var seenPages         = 0;
        var numberOfDocuments = size * pages;
        var documents         = CreateLazyStreamOfDocuments(numberOfDocuments);

        //first we setup our cold observable
        var tokenSource    = new CancellationTokenSource();
        var observableBulk = Client.BulkAll(documents, f => f
                                            .MaxDegreeOfParallelism(8)
                                            .BackOffTime(TimeSpan.FromSeconds(10))
                                            .BackOffRetries(2)
                                            .Size(size)
                                            .RefreshOnCompleted()
                                            .Index(index)
                                            , tokenSource.Token);

        //we set up an observer
        Exception ex           = null;
        var       bulkObserver = new BulkAllObserver(
            onError: (e) => OnError(ref ex, e, handle),
            onNext: (b) => Interlocked.Increment(ref seenPages)
            );

        //when we subscribe the observable becomes hot
        observableBulk.Subscribe(bulkObserver);

        //we wait N seconds to see some bulks
        handle.WaitOne(TimeSpan.FromSeconds(3));
        tokenSource.Cancel();
        //we wait N seconds to give in flight request a chance to cancel
        handle.WaitOne(TimeSpan.FromSeconds(3));

        if (ex != null && ex is not OperationCanceledException)
        {
            throw ex;
        }

        seenPages.Should().BeLessThan(pages).And.BeGreaterThan(0);
        var count = Client.Count(new CountRequest(index));

        //var count = Client.Count<SmallObject>(f => f.Index(index));
        count.Count.Should().BeLessThan(numberOfDocuments).And.BeGreaterThan(0);
        bulkObserver.TotalNumberOfFailedBuffers.Should().Be(0);
        bulkObserver.TotalNumberOfRetries.Should().Be(0);
    }
Esempio n. 5
0
        public bool BulkAll <T>(string indexName, IEnumerable <T> docs) where T : class
        {
            const int size        = 1000;
            var       tokenSource = new CancellationTokenSource();

            var observableBulk = B2BElasticClient.BulkAll(docs, b => b
                                                          .Index(indexName)
                                                          .BackOffTime(TimeSpan.FromSeconds(10))
                                                          .BackOffRetries(2)
                                                          .RefreshOnCompleted()
                                                          .MaxDegreeOfParallelism(Environment.ProcessorCount)
                                                          .Size(size)
                                                          .BufferToBulk((r, buffer) => r.IndexMany(buffer)), tokenSource.Token
                                                          );
            var countdownEvent = new CountdownEvent(1);

            Exception exception = null;

            void OnCompleted()
            {
                Logger.Info("BulkAll Finished");
                countdownEvent.Signal();
            }

            var bulkAllObserver = new BulkAllObserver(
                onNext: response =>
            {
                Logger.Info($"Indexed {response.Page * size} with {response.Retries} retries");
            },
                onError: ex =>
            {
                Logger.Info("BulkAll Error : {0}", ex);
                exception = ex;
                countdownEvent.Signal();
            },
                OnCompleted);

            observableBulk.Subscribe(bulkAllObserver);

            countdownEvent.Wait(tokenSource.Token);

            if (exception != null)
            {
                Logger.Info("BulkHotelGeo Error : {0}", exception);
                return(false);
            }
            else
            {
                return(true);
            }
        }
        /**
         * The internal implementation of `BulkAllObservable` is asynchronous, using the
         * https://docs.microsoft.com/en-us/dotnet/standard/events/observer-design-pattern[Observer Design Pattern] to enable observers to
         * be registered to take action when each bulk response is returned, an error has occurred, and when the `BulkAllObservable` has
         * finished. Whilst the internal implementation is asynchronous, you typically want to wait until all bulk indexing has finished before
         * continuing. The `Wait` method is a convenient shorthand to use for this, using a `ManualResetEvent` to block the current thread until
         * bulk indexing has finished or an error has occurred.
         *
         * ==== Advanced bulk indexing
         *
         * The `BulkAllObservable` helper exposes a number of methods to further control the process, such as
         *
         * * `BufferToBulk` to customize individual operations within the bulk request before it is dispatched to the server
         * * `RetryDocumentPredicate` to decide if a document that failed to be indexed should be retried
         * * `DroppedDocumentCallback` to  determine what to do in the event a document is not indexed, even after retrying
         *
         * The following example demonstrates some of these methods, in addition to using a `BulkAllObserver` to subscribe to
         * the bulk indexing process and take some action on each successful bulk response, when an error occurs, and when
         * the process has finished.
         *
         * IMPORTANT: An observer such as `BulkAllObserver` should not throw exceptions from its interface implementations, such
         * as `OnNext` and `OnError`. Any exceptions thrown should be expected to go unhandled. In light of this, any exception
         * that occurs during the bulk indexing process should be captured and thrown outside of the observer, as demonstrated in the
         * example below. Take a look at the
         * https://docs.microsoft.com/en-us/dotnet/standard/events/observer-design-pattern-best-practices#handling-exceptions[Observer Design Pattern best practices]
         * on handling exceptions.
         */
        public void AdvancedBulkIndexing()
        {
            //hide
            var people = new Person[] { };

            var bulkAllObservable = client.BulkAll(people, b => b
                                                   .BufferToBulk((descriptor, buffer) => //<1> Customise each bulk operation before it is dispatched
            {
                foreach (var person in buffer)
                {
                    descriptor.Index <Person>(bi => bi
                                              .Index(person.Id % 2 == 0 ? "even-index" : "odd-index")             //<2> Index each document into either even-index or odd-index
                                              .Document(person)
                                              );
                }
            })
                                                   .RetryDocumentPredicate((bulkResponseItem, person) => //<3> Decide if a document should be retried in the event of a failure
            {
                return(bulkResponseItem.Error.Index == "even-index" && person.FirstName == "Martijn");
            })
                                                   .DroppedDocumentCallback((bulkResponseItem, person) => //<4> If a document cannot be indexed this delegate is called
            {
                Console.WriteLine($"Unable to index: {bulkResponseItem} {person}");
            }));

            var waitHandle = new ManualResetEvent(false);
            ExceptionDispatchInfo exceptionDispatchInfo = null;

            var observer = new BulkAllObserver(
                onNext: response =>
            {
                // do something e.g. write number of pages to console
            },
                onError: exception =>
            {
                exceptionDispatchInfo = ExceptionDispatchInfo.Capture(exception);
                waitHandle.Set();
            },
                onCompleted: () => waitHandle.Set());

            bulkAllObservable.Subscribe(observer); // <5> Subscribe to the observable, which will initiate the bulk indexing process

            waitHandle.WaitOne();                  // <6> Block the current thread until a signal is received

            exceptionDispatchInfo?.Throw();        // <7> If an exception was captured during the bulk indexing process, throw it
        }
        public void DisposingObservableCancelsBulkAll()
        {
            var index  = CreateIndexName();
            var handle = new ManualResetEvent(false);

            var size              = 1000;
            var pages             = 100;
            var seenPages         = 0;
            var numberOfDocuments = size * pages;
            var documents         = this.CreateLazyStreamOfDocuments(numberOfDocuments);

            //first we setup our cold observable
            var observableBulk = this._client.BulkAll(documents, f => f
                                                      .MaxDegreeOfParallelism(8)
                                                      .BackOffTime(TimeSpan.FromSeconds(10))
                                                      .BackOffRetries(2)
                                                      .Size(size)
                                                      .RefreshOnCompleted()
                                                      .Index(index)
                                                      );
            //we set up an observer
            var bulkObserver = new BulkAllObserver(
                onError: (e) => { throw e; },
                onCompleted: () => handle.Set(),
                onNext: (b) => Interlocked.Increment(ref seenPages)
                );

            //when we subscribe the observable becomes hot
            observableBulk.Subscribe(bulkObserver);

            //we wait N seconds to see some bulks
            handle.WaitOne(TimeSpan.FromSeconds(3));
            observableBulk.Dispose();
            //we wait N seconds to give in flight request a chance to cancel
            handle.WaitOne(TimeSpan.FromSeconds(3));

            seenPages.Should().BeLessThan(pages).And.BeGreaterThan(0);
            var count = this._client.Count <SmallObject>(f => f.Index(index));

            count.Count.Should().BeLessThan(numberOfDocuments).And.BeGreaterThan(0);
            bulkObserver.TotalNumberOfFailedBuffers.Should().Be(0);
            bulkObserver.TotalNumberOfRetries.Should().Be(0);
        }
Esempio n. 8
0
        internal void Index(ElasticSearchIndex index)
        {
            var elasticClient = GetClient();


            var observableBulkAll = elasticClient.BulkAll(index.Documents, b => b
                                                          .Index(index.Name)
                                                          .Type("entry")
                                                          .BackOffRetries(5)
                                                          .BackOffTime("30s")
                                                          .RefreshOnCompleted(true)
                                                          .MaxDegreeOfParallelism(4)
                                                          .Size(30000)
                                                          );

            var bulkAllObserver = new BulkAllObserver(
                onError: (e) => { throw e; },
                onCompleted: () => DoSomething(index.Name),
                onNext: (b) => DoSomethingElse((BulkAllResponse)b, index.Name)
                );

            observableBulkAll.Subscribe(bulkAllObserver);
        }
        /// <summary>
        /// Bulk sync to ES.
        /// </summary>
        /// <typeparam name="T">Document</typeparam>
        /// <param name="addedOrUpdatedDocuments">The documents to synchronize</param>
        /// <returns>the list of documents that could NOT be synced</returns>
        internal List <string> Execute <T>(List <T> addedOrUpdatedDocuments) where T : DocumentBase
        {
            var sw = new Stopwatch();

            sw.Start();
            Logger.Debug($"Got {addedOrUpdatedDocuments.Count} new/updated items!");

            var failedIds = new List <string>();

            var bulkAllObservable = _esClient.BulkAll(addedOrUpdatedDocuments, b => b.BufferToBulk((descriptor, buffer) =>
            {
                foreach (T document in buffer)
                {
                    if (document.Deleted)
                    {
                        descriptor.Delete <T>(doc => doc.Index(document.GetType().Name.ToLower()).Document(document));
                        Logger.Debug($"Item {document.Id} marked to be deleted!");
                    }
                    else
                    {
                        descriptor.Index <T>(doc => doc.Index(document.GetType().Name.ToLower()).Document(document));
                        Logger.Debug($"Item {document.Id} marked to be upserted!");
                    }
                }
            })
                                                      .DroppedDocumentCallback((bulkResponseItem, document) =>
            {
                Logger.Error($"Unable to index: {bulkResponseItem} {document}");
                failedIds.Add(document.Id);
            })
                                                      .BackOffTime("1s")                                                                                                                    //how long to wait between retries
                                                      .BackOffRetries(_appSettings.Get(BusinessConstants.ElasticSearchBulkSyncNoOfRetries, DefaultValues.ElasticSearchBulkSyncNoOfRetries)) //how many retries are attempted if a failure occurs
                                                      .RefreshOnCompleted()                                                                                                                 //refresh the index after bulk insert
                                                      .MaxDegreeOfParallelism(Environment.ProcessorCount)
                                                      .ContinueAfterDroppedDocuments(true)
                                                      .Size(_appSettings.Get(BusinessConstants.ElasticSearchSyncBatchSize, DefaultValues.ElasticSearchSyncBatchSize)));;

            var waitHandle = new ManualResetEvent(false);
            ExceptionDispatchInfo exceptionDispatchInfo = null;

            var observer = new BulkAllObserver(
                onNext: response =>
            {
                Logger.Debug($"Written {response.Items.Count} in ES");
            },
                onError: exception =>
            {
                exceptionDispatchInfo = ExceptionDispatchInfo.Capture(exception);
                waitHandle.Set();
            },
                onCompleted: () => waitHandle.Set());

            bulkAllObservable.Subscribe(observer);       //Subscribe to the observable, which will initiate the bulk indexing process

            waitHandle.WaitOne(TimeSpan.FromMinutes(1)); //Block the current thread until a signal is received

            exceptionDispatchInfo?.Throw();              //If an exception was captured during the bulk indexing process, throw it

            sw.Stop();

            Logger.Debug("Finished in {ElapsedMilliseconds} ms", sw.ElapsedMilliseconds);

            return(failedIds);
        }
Esempio n. 10
0
        public static void Run(string endpoint, string username, string password, bool create = false)
        {
            var settings = new ConnectionSettings(new Uri(endpoint)).DefaultIndex("blogs").BasicAuthentication(username, password);
            var client   = new ElasticClient(settings);

            if (create)
            {
                var resp = client.Indices.Create("blogs", cid => cid
                                                 .Map <BlogIndexed>(m => m.AutoMap()
                                                                    .Properties(p => p.Keyword(kp => kp.Name(b => b.Author).Normalizer("lowercase")))
                                                                    .Properties(p => p.Text(tp => tp.Name(b => b.Title).Fields(f => f.Text(tf => tf.Analyzer("ngram_lc").Name("ngram_lc")))))
                                                                    .Properties(p => p.Keyword(tp => tp.Name(b => b.Tags).Fields(f => f.Text(tf => tf.Analyzer("ngram_lc").Name("ngram_lc"))))))
                                                 .Settings(i =>
                                                           i.Setting("max_ngram_diff", 30)
                                                           .Setting("max_result_window", 100000)
                                                           .Setting("max_rescore_window", 100000)
                                                           .Analysis(a =>
                                                                     a.Analyzers(ana =>
                                                                                 ana.Custom("ngram_lc", c => c.Filters("lowercase").Tokenizer("ngram_tokenizer")))
                                                                     .Tokenizers(t =>
                                                                                 t.NGram("ngram_tokenizer", n => n.MaxGram(30).MinGram(1).TokenChars(TokenChar.Letter, TokenChar.Digit)))
                                                                     .Normalizers(n => n.Custom("lowercase", cn => cn.Filters("lowercase"))))));
                if (!resp.IsValid)
                {
                    Console.WriteLine("error creating index");
                    return;
                }
            }
            BlogContextFactory blogContextFactory = new BlogContextFactory();

            using (var db = blogContextFactory.Create())
            {
                var totalBlogs = db.Blogs.Where(b => b.BlogID > 0).Count();
                Console.WriteLine($"total blogs: {totalBlogs}");
                int lastBlogId = LAST_BLOG_ID;
                for (int i = 0; i < totalBlogs; i += BATCH_SIZE)
                {
                    var blogs = db.Blogs.Where(b => b.BlogID > LAST_BLOG_ID).OrderBy(b => b.BlogID).Skip(i).Take(BATCH_SIZE)
                                .GroupJoin(db.Posts.Where(p => p.IdType == GmGard.Models.ItemType.Blog), b => b.BlogID, p => p.PostId, (b, p) => new { blog = b, post = p.Count() })
                                .GroupJoin(db.TagsInBlogs.DefaultIfEmpty(),
                                           b => b.blog.BlogID,
                                           tib => tib.BlogID,
                                           (b, tib) => new
                    {
                        b.blog,
                        tag = tib.Select(t => t.tag),
                        b.post,
                    }).ToList();
                    Console.WriteLine($"Send Items for {i} to {i + BATCH_SIZE - 1}");
                    var bulk = client.BulkAll(blogs.Select(b => new BlogIndexed
                    {
                        Id         = b.blog.BlogID,
                        Title      = b.blog.BlogTitle,
                        Content    = b.blog.Content,
                        Tags       = b.tag.Select(t => t.TagName),
                        CreateDate = b.blog.BlogDate,
                        CategoryId = b.blog.CategoryID,
                        Author     = b.blog.Author,
                        IsHarmony  = b.blog.isHarmony,
                        IsApproved = b.blog.isApproved,
                        BlogVisit  = b.blog.BlogVisit,
                        PostCount  = b.post,
                        Rating     = b.blog.Rating ?? 0,
                        ImagePath  = b.blog.ImagePath,
                        IsLocalImg = b.blog.IsLocalImg,
                    }), s => s
                                              // in case of 429 response, how long we should wait before retrying
                                              .BackOffTime(TimeSpan.FromSeconds(5))
                                              // in case of 429 response, how many times to retry before failing
                                              .BackOffRetries(5)
                                              .Index <BlogIndexed>());
                    var waitHandle      = new ManualResetEvent(false);
                    var bulkAllObserver = new BulkAllObserver(
                        onNext: bulkAllResponse =>
                    {
                        // do something after each bulk request
                        Console.WriteLine($"Done page {bulkAllResponse.Page} with retry {bulkAllResponse.Retries}");
                    },
                        onError: exception =>
                    {
                        waitHandle.Set();
                        throw exception;
                    },
                        onCompleted: () =>
                    {
                        // do something when all bulk operations complete
                        waitHandle.Set();
                    });
                    bulk.Subscribe(bulkAllObserver);
                    waitHandle.WaitOne();
                    if (blogs.Count > 0)
                    {
                        lastBlogId = blogs.Last().blog.BlogID;
                    }
                    if (blogs.Count < BATCH_SIZE)
                    {
                        break;
                    }
                }
                client.Indices.Refresh(Indices.Index("blogs"));
                Console.WriteLine($"last blogs: {lastBlogId}");
                Console.ReadLine();
            }
        }
Esempio n. 11
0
        public void IndexUsers(string usersPath, string badgesPath)
        {
            CreateUsersIndexIfNotExists();

            _client.Indices.UpdateSettings(UsersIndex, u => u
                                           .IndexSettings(i => i
                                                          .RefreshInterval("-1")
                                                          )
                                           );

            var size        = 1000;
            var seenPages   = 0;
            var indexedDocs = 0;
            var totalDocs   = 0;
            var handle      = new ManualResetEvent(false);

            var users          = StackOverflowData.GetUsers(usersPath);
            var observableBulk = _client.BulkAll(users, f => f
                                                 .MaxDegreeOfParallelism(16)
                                                 .BackOffTime(TimeSpan.FromSeconds(10))
                                                 .BackOffRetries(2)
                                                 .Size(size)
                                                 .RefreshOnCompleted()
                                                 .Index(UsersIndex)
                                                 );

            Exception exception    = null;
            var       bulkObserver = new BulkAllObserver(
                onError: e =>
            {
                exception = e;
                handle.Set();
            },
                onCompleted: () => handle.Set(),
                onNext: b =>
            {
                Interlocked.Add(ref indexedDocs, b.Items.Count(i => i.IsValid));
                Interlocked.Add(ref totalDocs, b.Items.Count);
                Interlocked.Increment(ref seenPages);
                Log.WriteLine($"indexed users page {seenPages}, {indexedDocs} out of {totalDocs}");
            }
                );

            var stopWatch = Stopwatch.StartNew();

            observableBulk.Subscribe(bulkObserver);
            handle.WaitOne();

            if (exception != null)
            {
                throw exception;
            }

            Log.WriteLine($"Time taken to index users: {stopWatch.Elapsed}");

            // update user badges
            seenPages   = 0;
            indexedDocs = 0;
            totalDocs   = 0;
            handle      = new ManualResetEvent(false);

            var badgeMetas = StackOverflowData.GetBadgeMetas(badgesPath);

            var observableBadgeBulk = _client.BulkAll(badgeMetas, f => f
                                                      .Index <User>()
                                                      .MaxDegreeOfParallelism(8)
                                                      .Size(size)
                                                      .BufferToBulk((bulk, badges) =>
            {
                foreach (var badge in badges)
                {
                    bulk.Update <User>(u => u
                                       .Script(s => s
                                               .Source(@"if (ctx._source.badges == null) { 
                                                    ctx._source.badges = [params.badge]; 
                                                } else if (ctx._source.badges.any(b -> b.name == params.badge.name) == false) { 
                                                    ctx._source.badges.add(params.badge); 
                                                }")
                                               .Params(d => d
                                                       .Add("badge", badge.Badge)
                                                       )
                                               )
                                       .Id(badge.UserId)
                                       .RetriesOnConflict(10)
                                       );
                }
            })
                                                      .RefreshOnCompleted()
                                                      );

            bulkObserver = new BulkAllObserver(
                onError: e =>
            {
                exception = e;
                handle.Set();
            },
                onCompleted: () => handle.Set(),
                onNext: b =>
            {
                Interlocked.Add(ref indexedDocs, b.Items.Count(i => i.IsValid));
                Interlocked.Add(ref totalDocs, b.Items.Count);
                Interlocked.Increment(ref seenPages);
                Log.WriteLine($"indexed badges page {seenPages}, {indexedDocs} out of {totalDocs}");
            }
                );

            stopWatch.Restart();
            observableBadgeBulk.Subscribe(bulkObserver);
            handle.WaitOne();

            if (exception != null)
            {
                throw exception;
            }

            Log.WriteLine($"Time taken to index badges: {stopWatch.Elapsed}");

            _client.Indices.UpdateSettings(UsersIndex, u => u
                                           .IndexSettings(i => i
                                                          .RefreshInterval("30s")
                                                          )
                                           );
        }
Esempio n. 12
0
        public void IndexPosts(string path)
        {
            CreatePostsIndexIfNotExists();

            _client.Indices.UpdateSettings(PostsIndex, u => u
                                           .IndexSettings(i => i
                                                          .RefreshInterval("-1")
                                                          )
                                           );

            var handle         = new ManualResetEvent(false);
            var size           = 1000;
            var posts          = StackOverflowData.GetPosts(path);
            var observableBulk = _client.BulkAll(posts, f => f
                                                 .MaxDegreeOfParallelism(Environment.ProcessorCount * 2)
                                                 .BackOffTime(TimeSpan.FromSeconds(10))
                                                 .BackOffRetries(2)
                                                 .Size(size)
                                                 .BufferToBulk((bulk, buffer) =>
            {
                foreach (var post in buffer)
                {
                    if (post is Question question)
                    {
                        var item = new BulkIndexOperation <Question>(question);
                        bulk.AddOperation(item);
                    }
                    else
                    {
                        var answer = (Answer)post;
                        var item   = new BulkIndexOperation <Answer>(answer);
                        bulk.AddOperation(item);
                    }
                }
            })
                                                 .RefreshOnCompleted()
                                                 .Index(PostsIndex)
                                                 );

            var seenPages   = 0;
            var indexedDocs = 0;
            var totalDocs   = 0;

            Exception exception    = null;
            var       bulkObserver = new BulkAllObserver(
                onError: e =>
            {
                exception = e;
                handle.Set();
            },
                onCompleted: () => handle.Set(),
                onNext: b =>
            {
                Interlocked.Add(ref indexedDocs, b.Items.Count(i => i.IsValid));
                Interlocked.Add(ref totalDocs, b.Items.Count);
                Interlocked.Increment(ref seenPages);
                Log.WriteLine($"indexed page {seenPages} of questions and answers, {indexedDocs} out of {totalDocs}");
            }
                );

            var stopWatch = Stopwatch.StartNew();

            observableBulk.Subscribe(bulkObserver);
            handle.WaitOne();

            if (exception != null)
            {
                throw exception;
            }

            Log.WriteLine($"time taken to index posts: {stopWatch.Elapsed}");

            _client.Indices.UpdateSettings(PostsIndex, u => u
                                           .IndexSettings(i => i
                                                          .RefreshInterval("30s")
                                                          )
                                           );
        }