/**
         * ==== Multiple documents with `BulkAllObservable` helper
         *
         * Using the `BulkAllObservable` helper allows you to focus on the overall objective of indexing, without having to
         * concern yourself with retry, backoff or chunking mechanics.
         * Multiple documents can be indexed using the `BulkAll` method and `Wait()` extension method.
         *
         * This helper exposes functionality to automatically retry / backoff in the event of an indexing failure,
         * and to control the number of documents indexed in a single HTTP request. In the example below each request will contain 1000 documents,
         * chunked from the original input. In the event of a large number of documents this could result in many HTTP requests, each containing
         * 1000 documents (the last request may contain less, depending on the total number).
         *
         * The helper will also lazily enumerate an `IEnumerable<T>` collection, allowing you to index a large number of documents easily.
         */
        public async Task BulkDocumentsWithObservableHelper()
        {
            // hide
            var people = new []
            {
                new Person
                {
                    Id        = 1,
                    FirstName = "Martijn",
                    LastName  = "Laarman"
                },
                new Person
                {
                    Id        = 2,
                    FirstName = "Stuart",
                    LastName  = "Cam"
                },
                new Person
                {
                    Id        = 3,
                    FirstName = "Russ",
                    LastName  = "Cam"
                }
                // snip
            };

            var bulkAllObservable = client.BulkAll(people, b => b
                                                   .Index("people")
                                                   .BackOffTime("30s") //<1> how long to wait between retries
                                                   .BackOffRetries(2)  //<2> how many retries are attempted if a failure occurs
                                                   .RefreshOnCompleted()
                                                   .MaxDegreeOfParallelism(Environment.ProcessorCount)
                                                   .Size(1000)              // <3> items per bulk request
                                                   )
                                    .Wait(TimeSpan.FromMinutes(15), next => //<4> perform the indexing and wait up to 15 minutes, whilst the BulkAll calls are asynchronous this is a blocking operation
            {
                // do something e.g. write number of pages to console
            });
        }
 public void BulkAdd(List <LogEvent> logEvents)
 {
     var bulkAllObservable = _client.BulkAll(logEvents, l => l
                                             .Index(IndexName)
                                             .BackOffTime("30s")
                                             .BackOffRetries(2)
                                             .RefreshOnCompleted()
                                             .MaxDegreeOfParallelism(Environment.ProcessorCount)
                                             .Size(1000))
                             .Wait(TimeSpan.FromMinutes(15), next =>
     {
     });
 }
示例#3
0
        public bool BulkAll <T>(string indexName, IEnumerable <T> docs) where T : class
        {
            const int size        = 1000;
            var       tokenSource = new CancellationTokenSource();

            var observableBulk = B2BElasticClient.BulkAll(docs, b => b
                                                          .Index(indexName)
                                                          .BackOffTime(TimeSpan.FromSeconds(10))
                                                          .BackOffRetries(2)
                                                          .RefreshOnCompleted()
                                                          .MaxDegreeOfParallelism(Environment.ProcessorCount)
                                                          .Size(size)
                                                          .BufferToBulk((r, buffer) => r.IndexMany(buffer)), tokenSource.Token
                                                          );
            var countdownEvent = new CountdownEvent(1);

            Exception exception = null;

            void OnCompleted()
            {
                Logger.Info("BulkAll Finished");
                countdownEvent.Signal();
            }

            var bulkAllObserver = new BulkAllObserver(
                onNext: response =>
            {
                Logger.Info($"Indexed {response.Page * size} with {response.Retries} retries");
            },
                onError: ex =>
            {
                Logger.Info("BulkAll Error : {0}", ex);
                exception = ex;
                countdownEvent.Signal();
            },
                OnCompleted);

            observableBulk.Subscribe(bulkAllObserver);

            countdownEvent.Wait(tokenSource.Token);

            if (exception != null)
            {
                Logger.Info("BulkHotelGeo Error : {0}", exception);
                return(false);
            }
            else
            {
                return(true);
            }
        }
        public void BulkAllGeneric <T>(List <T> elementList, string indexName)
            where T : class
        {
            var elementCount = elementList.Count();

            var count = 0;

            var waitHandle = new CountdownEvent(1);

            var bulkAll = _client.BulkAll(elementList, b => b
                                          .Index(indexName)
                                          .BackOffRetries(5)
                                          .BackOffTime(TimeSpan.FromSeconds(15))
                                          .RefreshOnCompleted(true)
                                          .MaxDegreeOfParallelism(2)
                                          .BulkResponseCallback((c) =>
            {
                if (!c.IsValid)
                {
                    _logger.Debug($"Bulk Response was invalid: {c.DebugInformation}");
                }
            })
                                          .Size(1000));

            bulkAll.Subscribe(observer: new BulkAllObserver(
                                  onNext: (b) =>
            {
                count = count + 1000;

                if (count > elementCount)
                {
                    count = elementCount;
                }

                _logger.Debug($"Indexed group of {typeof(T)}: {count} of {elementCount}");
            },
                                  onError: (e) =>
            {
                _logger.Error(e, e.Message);
                waitHandle.Signal();
            },
                                  onCompleted: () =>
            {
                waitHandle.Signal();
            }));
            waitHandle.Wait();
        }
示例#5
0
        public void BulkAll <T>(IEnumerable <T> documents, string indexName, int batchSize) where T : class
        {
            var       count            = 0;
            var       elementCount     = documents.Count();
            var       timer            = Stopwatch.StartNew();
            var       waitHandle       = new ManualResetEvent(false);
            Exception bulkAllException = null;

            var bulkAll = _client.BulkAll(documents, b => b
                                          .Index(indexName)
                                          .BackOffRetries(15)
                                          .BackOffTime(TimeSpan.FromSeconds(30))
                                          .RefreshOnCompleted(true)
                                          .MaxDegreeOfParallelism(4)
                                          .Size(batchSize));

            bulkAll.Subscribe(new BulkAllObserver(
                                  b =>
            {
                count = count + batchSize;
                if (count > elementCount)
                {
                    count = elementCount;
                }
                _logger.Debug($"Indexed group of Document: {count} of {elementCount}");
            },
                                  e =>
            {
                _logger.Error(e, e.Message);
                bulkAllException = e;

                waitHandle.Set();
            },
                                  () => {
                waitHandle.Set();
            }));

            waitHandle.WaitOne();

            if (bulkAllException != null)
            {
                throw bulkAllException;
            }

            SendLog(null, null, timer.ElapsedMilliseconds, "Bulk completed for Document");
        }
示例#6
0
        public void BulkAll(List <RoatpProviderDocument> documents, string indexName)
        {
            var count        = 0;
            var elementCount = documents.Count();

            var batchSize = 1000;

            var timer      = Stopwatch.StartNew();
            var waitHandle = new CountdownEvent(1);

            var bulkAll = _client.BulkAll(documents, b => b
                                          .Index(indexName)
                                          .BackOffRetries(15)
                                          .BackOffTime(TimeSpan.FromSeconds(30))
                                          .RefreshOnCompleted(true)
                                          .MaxDegreeOfParallelism(4)
                                          .Size(batchSize));

            bulkAll.Subscribe(observer: new BulkAllObserver(
                                  onNext: (b) =>
            {
                count = count + batchSize;

                if (count > elementCount)
                {
                    count = elementCount;
                }

                _logger.Debug($"Indexed group of RoatpProviderDocument: {count} of {elementCount}");
            },
                                  onError: (e) =>
            {
                _logger.Error(e, e.Message);
                throw e;
            },
                                  onCompleted: () =>
            {
                waitHandle.Signal();
            }));
            waitHandle.Wait();

            SendLog(null, null, timer.ElapsedMilliseconds, "Bulk completed for RoatpProviderDocument");
        }
示例#7
0
        private async Task IndexMockData(IElasticClient c, int requestsPerIteration)
        {
            var tokenSource = new CancellationTokenSource();
            await c.Indices.DeleteAsync(Index <Project>(), ct : tokenSource.Token);

            var observableBulk = c.BulkAll(MockDataGenerator(100000), f => f
                                           .MaxDegreeOfParallelism(10)
                                           .BackOffTime(TimeSpan.FromSeconds(10))
                                           .BackOffRetries(2)
                                           .Size(1000)
                                           .RefreshOnCompleted()
                                           , tokenSource.Token);
            await observableBulk.ForEachAsync(x => { }, tokenSource.Token);

            var statsRequest = new NodesStatsRequest(NodesStatsMetric.Http);
            var nodeStats    = await c.Nodes.StatsAsync(statsRequest, tokenSource.Token);

            AssertHttpStats(c, nodeStats, -1, requestsPerIteration);
        }
示例#8
0
        private void Flush(object source, ElapsedEventArgs ea)
        {
            // TODO use a sync object to avoid concurrency instead of copy the list and clear...
            if (_toBeAdded.Any())
            {
                var docs = _toBeAdded.ToList();
                _toBeAdded.Clear();

                var waitHandle = new CountdownEvent(1);

                var bulkAll = _elasticClient.BulkAll(docs, b => b
                                                     .Index(_indexName) /* index */
                                                     .Type <ZeroTask>()
                                                     .BackOffRetries(2)
                                                     .BackOffTime("30s")
                                                     .RefreshOnCompleted(true)
                                                     .MaxDegreeOfParallelism(4)
                                                     .Size(1000)
                                                     );

                bulkAll.Subscribe(new BulkAllObserver(
                                      //onNext: (b) => { Console.Write("."); },
                                      onError: (e) => throw new Exception("There is a problem with ElasticSearch", e),
                                      onCompleted: () => waitHandle.Signal()
                                      ));

                waitHandle.Wait();
            }

            if (!_toBeDeleted.Any())
            {
                return;
            }

            var toBeDeleted = _toBeDeleted.ToList();

            _toBeDeleted.Clear();
            foreach (var id in toBeDeleted)
            {
                _elasticClient.Delete <T>(id, d => d.Index(_indexName));
            }
        }
示例#9
0
        private async Task <int> PushAllImpl(IEnumerable <IDictionary <string, string> > lazyEnumerable, string mappingName)
        {
            var seenPages = 0;
            var tcs       = new TaskCompletionSource <int>();

            var observableBulk = _client.BulkAll(lazyEnumerable, bulkDescriptor => {
                bulkDescriptor
                .BufferToBulk((x, batch) => x.IndexMany(batch, (bd, d) => bd
                                                        .Id(d["PartitionKey"] + d["RowKey"]))
                              .Index(_indexNamer.BuildName(batch[0]["@timestamp"], mappingName)
                                     ))
                .Type(mappingName);

                if (_setPipeline)
                {
                    bulkDescriptor.Pipeline(mappingName.ToLower());
                }

                return(bulkDescriptor
                       .MaxDegreeOfParallelism(5)
                       .Size(_batchSize));
            });

            var observer = new BulkAllObserver(
                onNext: (b) => Interlocked.Increment(ref seenPages),
                onCompleted: () =>
            {
                tcs.SetResult(seenPages);
            },
                onError: e => {
                TheTrace.TraceWarning(e.ToString());
                tcs.SetException(e);
            }
                );

            observableBulk.Subscribe(observer);
            return(await tcs.Task.ConfigureAwait(false));
        }
 public async Task StoreBulk <T>(IList <T> documents, int size, string indexName = "") where T : class
 {
     //string index = string.IsNullOrEmpty(indexName) ? GenerateIndexName<T>() : indexName;
     //await CreateIndex<T>(index);
     var bulkAllObservable = _elasticClient.BulkAll(documents, b => b
                                                    .Index("news")
                                                    // how long to wait between retries
                                                    .BackOffTime("30s")
                                                    // how many retries are attempted if a failure occurs
                                                    .BackOffRetries(2)
                                                    // refresh the index once the bulk operation completes
                                                    .RefreshOnCompleted()
                                                    // how many concurrent bulk requests to make
                                                    .MaxDegreeOfParallelism(Environment.ProcessorCount)
                                                    // number of items per bulk request
                                                    .Size(size)
                                                    )
                             // Perform the indexing, waiting up to 15 minutes.
                             // Whilst the BulkAll calls are asynchronous this is a blocking operation
                             .Wait(TimeSpan.FromMinutes(15), next =>
     {
         // do something on each response e.g. write number of batches indexed to console
     });
 }
 protected override BulkAllObservable <T> ExecuteCore(IElasticClient client, string index)
 {
     return(client.BulkAll(_documents, desc => BuildQueryCore(desc, index, _refreshOnSave)));
 }
示例#12
0
        public void IndexUsers(string usersPath, string badgesPath)
        {
            CreateUsersIndexIfNotExists();

            _client.Indices.UpdateSettings(UsersIndex, u => u
                                           .IndexSettings(i => i
                                                          .RefreshInterval("-1")
                                                          )
                                           );

            var size        = 1000;
            var seenPages   = 0;
            var indexedDocs = 0;
            var totalDocs   = 0;
            var handle      = new ManualResetEvent(false);

            var users          = StackOverflowData.GetUsers(usersPath);
            var observableBulk = _client.BulkAll(users, f => f
                                                 .MaxDegreeOfParallelism(16)
                                                 .BackOffTime(TimeSpan.FromSeconds(10))
                                                 .BackOffRetries(2)
                                                 .Size(size)
                                                 .RefreshOnCompleted()
                                                 .Index(UsersIndex)
                                                 );

            Exception exception    = null;
            var       bulkObserver = new BulkAllObserver(
                onError: e =>
            {
                exception = e;
                handle.Set();
            },
                onCompleted: () => handle.Set(),
                onNext: b =>
            {
                Interlocked.Add(ref indexedDocs, b.Items.Count(i => i.IsValid));
                Interlocked.Add(ref totalDocs, b.Items.Count);
                Interlocked.Increment(ref seenPages);
                Log.WriteLine($"indexed users page {seenPages}, {indexedDocs} out of {totalDocs}");
            }
                );

            var stopWatch = Stopwatch.StartNew();

            observableBulk.Subscribe(bulkObserver);
            handle.WaitOne();

            if (exception != null)
            {
                throw exception;
            }

            Log.WriteLine($"Time taken to index users: {stopWatch.Elapsed}");

            // update user badges
            seenPages   = 0;
            indexedDocs = 0;
            totalDocs   = 0;
            handle      = new ManualResetEvent(false);

            var badgeMetas = StackOverflowData.GetBadgeMetas(badgesPath);

            var observableBadgeBulk = _client.BulkAll(badgeMetas, f => f
                                                      .Index <User>()
                                                      .MaxDegreeOfParallelism(8)
                                                      .Size(size)
                                                      .BufferToBulk((bulk, badges) =>
            {
                foreach (var badge in badges)
                {
                    bulk.Update <User>(u => u
                                       .Script(s => s
                                               .Source(@"if (ctx._source.badges == null) { 
                                                    ctx._source.badges = [params.badge]; 
                                                } else if (ctx._source.badges.any(b -> b.name == params.badge.name) == false) { 
                                                    ctx._source.badges.add(params.badge); 
                                                }")
                                               .Params(d => d
                                                       .Add("badge", badge.Badge)
                                                       )
                                               )
                                       .Id(badge.UserId)
                                       .RetriesOnConflict(10)
                                       );
                }
            })
                                                      .RefreshOnCompleted()
                                                      );

            bulkObserver = new BulkAllObserver(
                onError: e =>
            {
                exception = e;
                handle.Set();
            },
                onCompleted: () => handle.Set(),
                onNext: b =>
            {
                Interlocked.Add(ref indexedDocs, b.Items.Count(i => i.IsValid));
                Interlocked.Add(ref totalDocs, b.Items.Count);
                Interlocked.Increment(ref seenPages);
                Log.WriteLine($"indexed badges page {seenPages}, {indexedDocs} out of {totalDocs}");
            }
                );

            stopWatch.Restart();
            observableBadgeBulk.Subscribe(bulkObserver);
            handle.WaitOne();

            if (exception != null)
            {
                throw exception;
            }

            Log.WriteLine($"Time taken to index badges: {stopWatch.Elapsed}");

            _client.Indices.UpdateSettings(UsersIndex, u => u
                                           .IndexSettings(i => i
                                                          .RefreshInterval("30s")
                                                          )
                                           );
        }